[LoopVectorize] Induction variables: support arbitrary constant step.

Previously, only -1 and +1 step values are supported for induction variables. This patch extends LV to support
arbitrary constant steps.
Initial patch by Alexey Volkov. Some bug fixes are added in the following version.

Differential Revision: http://reviews.llvm.org/D6051 and http://reviews.llvm.org/D7193


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227557 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hao Liu
2015-01-30 05:02:21 +00:00
parent 7d3a44a692
commit e7769db118
4 changed files with 282 additions and 137 deletions

View File

@ -355,10 +355,9 @@ protected:
/// element. /// element.
virtual Value *getBroadcastInstrs(Value *V); virtual Value *getBroadcastInstrs(Value *V);
/// This function adds 0, 1, 2 ... to each vector element, starting at zero. /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). /// to each vector element of Val. The sequence starts at StartIndex.
/// The sequence starts at StartIndex. virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous /// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values. /// values within the current basic block or on loop invariant values.
@ -479,7 +478,7 @@ private:
bool IfPredicateStore = false) override; bool IfPredicateStore = false) override;
void vectorizeMemoryInstruction(Instruction *Instr) override; void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override; Value *getBroadcastInstrs(Value *V) override;
Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override; Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
Value *reverseVector(Value *Vec) override; Value *reverseVector(Value *Vec) override;
}; };
@ -603,11 +602,9 @@ public:
/// This enum represents the kinds of inductions that we support. /// This enum represents the kinds of inductions that we support.
enum InductionKind { enum InductionKind {
IK_NoInduction, ///< Not an induction variable. IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = 1. IK_IntInduction, ///< Integer induction variable. Step = C.
IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
}; };
// This enum represents the kind of minmax reduction. // This enum represents the kind of minmax reduction.
@ -697,12 +694,67 @@ public:
/// A struct for saving information about induction variables. /// A struct for saving information about induction variables.
struct InductionInfo { struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} : StartValue(Start), IK(K), StepValue(Step) {
assert(IK != IK_NoInduction && "Not an induction");
assert(StartValue && "StartValue is null");
assert(StepValue && !StepValue->isZero() && "StepValue is zero");
assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
"StartValue is not a pointer for pointer induction");
assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
"StartValue is not an integer for integer induction");
assert(StepValue->getType()->isIntegerTy() &&
"StepValue is not an integer");
}
InductionInfo()
: StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
/// Get the consecutive direction. Returns:
/// 0 - unknown or non-consecutive.
/// 1 - consecutive and increasing.
/// -1 - consecutive and decreasing.
int getConsecutiveDirection() const {
if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
return StepValue->getSExtValue();
return 0;
}
/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
/// For integer induction, returns StartValue + Index * StepValue.
/// For pointer induction, returns StartValue[Index * StepValue].
/// FIXME: The newly created binary instructions should contain nsw/nuw
/// flags, which can be found from the original scalar operations.
Value *transform(IRBuilder<> &B, Value *Index) const {
switch (IK) {
case IK_IntInduction:
assert(Index->getType() == StartValue->getType() &&
"Index type does not match StartValue type");
if (StepValue->isMinusOne())
return B.CreateSub(StartValue, Index);
if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
return B.CreateAdd(StartValue, Index);
case IK_PtrInduction:
if (StepValue->isMinusOne())
Index = B.CreateNeg(Index);
else if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
return B.CreateGEP(StartValue, Index);
case IK_NoInduction:
default:
return nullptr;
}
}
/// Start value. /// Start value.
TrackingVH<Value> StartValue; TrackingVH<Value> StartValue;
/// Induction kind. /// Induction kind.
InductionKind IK; InductionKind IK;
/// Step value.
ConstantInt *StepValue;
}; };
/// ReductionList contains the reduction descriptors for all /// ReductionList contains the reduction descriptors for all
@ -822,9 +874,9 @@ private:
/// pattern corresponding to a min(X, Y) or max(X, Y). /// pattern corresponding to a min(X, Y) or max(X, Y).
static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I, static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
ReductionInstDesc &Prev); ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction /// Returns the induction kind of Phi and record the step. This function may
/// if the PHI is not an induction variable. /// return NoInduction if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi); InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
/// \brief Collect memory access with loop invariant strides. /// \brief Collect memory access with loop invariant strides.
/// ///
@ -1592,11 +1644,13 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf; return Shuf;
} }
Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
bool Negate) { Value *Step) {
assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() && assert(Val->getType()->getScalarType()->isIntegerTy() &&
"Elem must be an integer"); "Elem must be an integer");
assert(Step->getType() == Val->getType()->getScalarType() &&
"Step has wrong type");
// Create the types. // Create the types.
Type *ITy = Val->getType()->getScalarType(); Type *ITy = Val->getType()->getScalarType();
VectorType *Ty = cast<VectorType>(Val->getType()); VectorType *Ty = cast<VectorType>(Val->getType());
@ -1604,15 +1658,18 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
SmallVector<Constant*, 8> Indices; SmallVector<Constant*, 8> Indices;
// Create a vector of consecutive numbers from zero to VF. // Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) { for (int i = 0; i < VLen; ++i)
int64_t Idx = Negate ? (-i) : i; Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
// Add the consecutive indices to the vector value. // Add the consecutive indices to the vector value.
Constant *Cv = ConstantVector::get(Indices); Constant *Cv = ConstantVector::get(Indices);
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
return Builder.CreateAdd(Val, Cv, "induction"); Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
Step = Builder.CreateMul(Cv, Step);
return Builder.CreateAdd(Val, Step, "induction");
} }
/// \brief Find the operand of the GEP that should be checked for consecutive /// \brief Find the operand of the GEP that should be checked for consecutive
@ -1650,10 +1707,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) { if (Phi && Inductions.count(Phi)) {
InductionInfo II = Inductions[Phi]; InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK) return II.getConsecutiveDirection();
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
} }
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr); GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
@ -1678,10 +1732,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0; return 0;
InductionInfo II = Inductions[Phi]; InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK) return II.getConsecutiveDirection();
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
} }
unsigned InductionOperand = getGEPInductionOperand(DL, Gep); unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
@ -2496,33 +2547,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(), II.StartValue->getType(),
"cast.crd"); "cast.crd");
EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); EndValue = II.transform(BypassBuilder, CRD);
break; EndValue->setName("ind.end");
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
// Handle reverse integer induction counter.
EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
break; break;
} }
case LoopVectorizationLegality::IK_PtrInduction: { case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using EndValue = II.transform(BypassBuilder, CountRoundDown);
// the end index. EndValue->setName("ptr.ind.end");
EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
"ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
"rev.ind.end");
EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
"rev.ptr.ind.end");
break; break;
} }
}// end of case }// end of case
@ -3137,6 +3168,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
LoopVectorizationLegality::InductionInfo II = LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(P); Legal->getInductionVars()->lookup(P);
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
switch (II.IK) { switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction: case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction"); llvm_unreachable("Unknown induction");
@ -3154,80 +3187,42 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
"normalized.idx"); "normalized.idx");
NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, Broadcasted = II.transform(Builder, NormalizedIdx);
"offset.idx"); Broadcasted->setName("offset.idx");
} }
Broadcasted = getBroadcastInstrs(Broadcasted); Broadcasted = getBroadcastInstrs(Broadcasted);
// After broadcasting the induction variable we need to make the vector // After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc. // consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part) for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
return; return;
} }
case LoopVectorizationLegality::IK_ReverseIntInduction:
case LoopVectorizationLegality::IK_PtrInduction: case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");
// Handle the reverse integer induction variable case.
if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
"resize.norm.idx");
Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
"reverse.idx");
// This is a new value so do not hoist it out.
Value *Broadcasted = getBroadcastInstrs(ReverseInd);
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
true);
return;
}
// Handle the pointer induction variable case. // Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type."); assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
// Is this a reverse induction ptr or a consecutive induction ptr. Value *NormalizedIdx =
bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
II.IK);
// This is the vector of results. Notice that we don't generate // This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code. // vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) { for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) { if (VF == 1) {
int EltIndex = (part) * (Reverse ? -1 : 1); int EltIndex = part;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx; Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
if (Reverse) Value *SclrGep = II.transform(Builder, GlobalIdx);
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); SclrGep->setName("next.gep");
else
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
Entry[part] = SclrGep; Entry[part] = SclrGep;
continue; continue;
} }
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) { for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = (i + part * VF) * (Reverse ? -1 : 1); int EltIndex = i + part * VF;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx; Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
if (!Reverse) Value *SclrGep = II.transform(Builder, GlobalIdx);
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); SclrGep->setName("next.gep");
else
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep, VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
Builder.getInt32(i), Builder.getInt32(i),
"insert.gep"); "insert.gep");
@ -3247,7 +3242,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Nothing to do for PHIs and BR, since we already took care of the // Nothing to do for PHIs and BR, since we already took care of the
// loop control flow instructions. // loop control flow instructions.
continue; continue;
case Instruction::PHI:{ case Instruction::PHI: {
// Vectorize PHINodes. // Vectorize PHINodes.
widenPHIInstruction(it, Entry, UF, VF, PV); widenPHIInstruction(it, Entry, UF, VF, PV);
continue; continue;
@ -3368,8 +3363,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType()); CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast); Value *Broadcasted = getBroadcastInstrs(ScalarCast);
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(OldInduction);
Constant *Step =
ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part) for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false); Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
propagateMetadata(Entry, it); propagateMetadata(Entry, it);
break; break;
} }
@ -3716,8 +3715,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// This is the value coming from the preheader. // This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
ConstantInt *StepValue = nullptr;
// Check if this is an induction variable. // Check if this is an induction variable.
InductionKind IK = isInductionVariable(Phi); InductionKind IK = isInductionVariable(Phi, StepValue);
if (IK_NoInduction != IK) { if (IK_NoInduction != IK) {
// Get the widest type. // Get the widest type.
@ -3727,7 +3727,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV. // Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) { if (IK == IK_IntInduction && StepValue->isOne()) {
// Use the phi node with the widest type as induction. Use the last // Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other // one if there are multiple (no good reason for doing this other
// than it is expedient). // than it is expedient).
@ -3736,7 +3736,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
} }
DEBUG(dbgs() << "LV: Found an induction variable.\n"); DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK); Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
// Until we explicitly handle the case of an induction variable with // Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop. // an outside loop user we have to give up vectorizing this loop.
@ -5287,7 +5287,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
} }
LoopVectorizationLegality::InductionKind LoopVectorizationLegality::InductionKind
LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
ConstantInt *&StepValue) {
Type *PhiTy = Phi->getType(); Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables. // We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@ -5300,22 +5301,19 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return IK_NoInduction; return IK_NoInduction;
} }
const SCEV *Step = AR->getStepRecurrence(*SE); const SCEV *Step = AR->getStepRecurrence(*SE);
// Integer inductions need to have a stride of one.
if (PhiTy->isIntegerTy()) {
if (Step->isOne())
return IK_IntInduction;
if (Step->isAllOnesValue())
return IK_ReverseIntInduction;
return IK_NoInduction;
}
// Calculate the pointer stride and check if it is consecutive. // Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) if (!C)
return IK_NoInduction; return IK_NoInduction;
ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
StepValue = CV;
return IK_IntInduction;
}
assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
Type *PointerElementType = PhiTy->getPointerElementType(); Type *PointerElementType = PhiTy->getPointerElementType();
// The pointer stride cannot be determined if the pointer element type is not // The pointer stride cannot be determined if the pointer element type is not
@ -5323,13 +5321,12 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
if (!PointerElementType->isSized()) if (!PointerElementType->isSized())
return IK_NoInduction; return IK_NoInduction;
uint64_t Size = DL->getTypeAllocSize(PointerElementType); int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType));
if (C->getValue()->equalsInt(Size)) int64_t CVSize = CV->getSExtValue();
return IK_PtrInduction; if (CVSize % Size)
else if (C->getValue()->equalsInt(0 - Size)) return IK_NoInduction;
return IK_ReversePtrInduction; StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
return IK_PtrInduction;
return IK_NoInduction;
} }
bool LoopVectorizationLegality::isInductionVariable(const Value *V) { bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@ -6311,11 +6308,10 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
return V; return V;
} }
Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx, Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
bool Negate) {
// When unrolling and the VF is 1, we only need to add a simple scalar. // When unrolling and the VF is 1, we only need to add a simple scalar.
Type *ITy = Val->getType(); Type *ITy = Val->getType();
assert(!ITy->isVectorTy() && "Val must be a scalar"); assert(!ITy->isVectorTy() && "Val must be a scalar");
Constant *C = ConstantInt::get(ITy, StartIdx, Negate); Constant *C = ConstantInt::get(ITy, StartIdx);
return Builder.CreateAdd(Val, C, "induction"); return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
} }

View File

@ -0,0 +1,150 @@
; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s
; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
; Test integer induction variable of step 2:
; for (int i = 0; i < 1024; i+=2) {
; int tmp = *A++;
; sum += i * tmp;
; }
; CHECK-LABEL: @ind_plus2(
; CHECK: load <4 x i32>*
; CHECK: load <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: %index.next = add i64 %index, 8
; CHECK: icmp eq i64 %index.next, 512
; FORCE-VEC-LABEL: @ind_plus2(
; FORCE-VEC: %wide.load = load <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 512
define i32 @ind_plus2(i32* %A) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
%i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%add1 = add nsw i32 %i, 2
%cmp = icmp slt i32 %add1, 1024
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}
; Test integer induction variable of step -2:
; for (int i = 1024; i > 0; i-=2) {
; int tmp = *A++;
; sum += i * tmp;
; }
; CHECK-LABEL: @ind_minus2(
; CHECK: load <4 x i32>*
; CHECK: load <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: %index.next = add i64 %index, 8
; CHECK: icmp eq i64 %index.next, 512
; FORCE-VEC-LABEL: @ind_minus2(
; FORCE-VEC: %wide.load = load <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 512
define i32 @ind_minus2(i32* %A) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
%i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%sub = add nsw i32 %i, -2
%cmp = icmp sgt i32 %i, 2
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}
; Test pointer induction variable of step 2. As currently we don't support
; masked load/store, vectorization is possible but not beneficial. If loop
; vectorization is not enforced, LV will only do interleave.
; for (int i = 0; i < 1024; i++) {
; int tmp0 = *A++;
; int tmp1 = *A++;
; sum += tmp0 * tmp1;
; }
; CHECK-LABEL: @ptr_ind_plus2(
; CHECK: load i32*
; CHECK: load i32*
; CHECK: load i32*
; CHECK: load i32*
; CHECK: mul nsw i32
; CHECK: mul nsw i32
; CHECK: add nsw i32
; CHECK: add nsw i32
; CHECK: %index.next = add i64 %index, 2
; CHECK: %21 = icmp eq i64 %index.next, 1024
; FORCE-VEC-LABEL: @ptr_ind_plus2(
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 1024
define i32 @ptr_ind_plus2(i32* %A) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%inc.ptr1 = getelementptr inbounds i32* %A.addr, i64 2
%1 = load i32* %inc.ptr, align 4
%mul = mul nsw i32 %1, %0
%add = add nsw i32 %mul, %sum
%inc = add nsw i32 %i, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}

View File

@ -388,9 +388,8 @@ define void @example12() nounwind uwtable ssp {
ret void ret void
} }
; Can't vectorize because of reductions.
;CHECK-LABEL: @example13( ;CHECK-LABEL: @example13(
;CHECK-NOT: <4 x i32> ;CHECK: <4 x i32>
;CHECK: ret void ;CHECK: ret void
define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp { define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
br label %.preheader br label %.preheader

View File

@ -97,7 +97,7 @@ loopend:
; CHECK: vector.body ; CHECK: vector.body
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %normalized.idx = sub i64 %index, 0 ; CHECK: %normalized.idx = sub i64 %index, 0
; CHECK: %reverse.idx = sub i64 1023, %normalized.idx ; CHECK: %offset.idx = sub i64 1023, %normalized.idx
; CHECK: trunc i64 %index to i8 ; CHECK: trunc i64 %index to i8
define void @reverse_forward_induction_i64_i8() { define void @reverse_forward_induction_i64_i8() {
@ -124,7 +124,7 @@ while.end:
; CHECK: vector.body: ; CHECK: vector.body:
; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %normalized.idx = sub i64 %index, 129 ; CHECK: %normalized.idx = sub i64 %index, 129
; CHECK: %reverse.idx = sub i64 1023, %normalized.idx ; CHECK: %offset.idx = sub i64 1023, %normalized.idx
; CHECK: trunc i64 %index to i8 ; CHECK: trunc i64 %index to i8
define void @reverse_forward_induction_i64_i8_signed() { define void @reverse_forward_induction_i64_i8_signed() {