mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Revert "LoopVectorize: Use the dependence test utility class"
This reverts commit cbfa1ca993
.
We are seeing a stage2 and stage3 miscompare on some dragonegg bots.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184690 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
daee0b0def
commit
ec677e2a64
@ -54,6 +54,7 @@
|
|||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
|
#include "llvm/Analysis/AliasSetTracker.h"
|
||||||
#include "llvm/Analysis/Dominators.h"
|
#include "llvm/Analysis/Dominators.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
#include "llvm/Analysis/LoopIterator.h"
|
#include "llvm/Analysis/LoopIterator.h"
|
||||||
@ -408,10 +409,11 @@ bool LoadHoisting::canHoistAllLoads() {
|
|||||||
class LoopVectorizationLegality {
|
class LoopVectorizationLegality {
|
||||||
public:
|
public:
|
||||||
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
|
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
|
||||||
DominatorTree *DT, TargetLibraryInfo *TLI)
|
DominatorTree *DT, TargetTransformInfo* TTI,
|
||||||
: TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
|
AliasAnalysis *AA, TargetLibraryInfo *TLI)
|
||||||
|
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
|
||||||
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
|
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
|
||||||
MaxSafeDepDistBytes(-1U), LoadSpeculation(L, DT) {}
|
LoadSpeculation(L, DT) {}
|
||||||
|
|
||||||
/// This enum represents the kinds of reductions that we support.
|
/// This enum represents the kinds of reductions that we support.
|
||||||
enum ReductionKind {
|
enum ReductionKind {
|
||||||
@ -498,8 +500,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Insert a pointer and calculate the start and end SCEVs.
|
/// Insert a pointer and calculate the start and end SCEVs.
|
||||||
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
|
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
|
||||||
unsigned DepSetId);
|
|
||||||
|
|
||||||
/// This flag indicates if we need to add the runtime check.
|
/// This flag indicates if we need to add the runtime check.
|
||||||
bool Need;
|
bool Need;
|
||||||
@ -511,9 +512,6 @@ public:
|
|||||||
SmallVector<const SCEV*, 2> Ends;
|
SmallVector<const SCEV*, 2> Ends;
|
||||||
/// Holds the information if this pointer is used for writing to memory.
|
/// Holds the information if this pointer is used for writing to memory.
|
||||||
SmallVector<bool, 2> IsWritePtr;
|
SmallVector<bool, 2> IsWritePtr;
|
||||||
/// Holds the id of the set of pointers that could be dependent because of a
|
|
||||||
/// shared underlying object.
|
|
||||||
SmallVector<unsigned, 2> DependencySetId;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A POD for saving information about induction variables.
|
/// A POD for saving information about induction variables.
|
||||||
@ -534,6 +532,11 @@ public:
|
|||||||
/// induction descriptor.
|
/// induction descriptor.
|
||||||
typedef MapVector<PHINode*, InductionInfo> InductionList;
|
typedef MapVector<PHINode*, InductionInfo> InductionList;
|
||||||
|
|
||||||
|
/// Alias(Multi)Map stores the values (GEPs or underlying objects and their
|
||||||
|
/// respective Store/Load instruction(s) to calculate aliasing.
|
||||||
|
typedef MapVector<Value*, Instruction* > AliasMap;
|
||||||
|
typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
|
||||||
|
|
||||||
/// Returns true if it is legal to vectorize this loop.
|
/// Returns true if it is legal to vectorize this loop.
|
||||||
/// This does not mean that it is profitable to vectorize this
|
/// This does not mean that it is profitable to vectorize this
|
||||||
/// loop, only that it is legal to do so.
|
/// loop, only that it is legal to do so.
|
||||||
@ -580,9 +583,6 @@ public:
|
|||||||
/// This function returns the identity element (or neutral element) for
|
/// This function returns the identity element (or neutral element) for
|
||||||
/// the operation K.
|
/// the operation K.
|
||||||
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
|
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
|
||||||
|
|
||||||
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Check if a single basic block loop is vectorizable.
|
/// Check if a single basic block loop is vectorizable.
|
||||||
/// At this point we know that this is a loop with a constant trip count
|
/// At this point we know that this is a loop with a constant trip count
|
||||||
@ -623,6 +623,16 @@ private:
|
|||||||
/// Returns the induction kind of Phi. This function may return NoInduction
|
/// Returns the induction kind of Phi. This function may return NoInduction
|
||||||
/// if the PHI is not an induction variable.
|
/// if the PHI is not an induction variable.
|
||||||
InductionKind isInductionVariable(PHINode *Phi);
|
InductionKind isInductionVariable(PHINode *Phi);
|
||||||
|
/// Return true if can compute the address bounds of Ptr within the loop.
|
||||||
|
bool hasComputableBounds(Value *Ptr);
|
||||||
|
/// Return true if there is the chance of write reorder.
|
||||||
|
bool hasPossibleGlobalWriteReorder(Value *Object,
|
||||||
|
Instruction *Inst,
|
||||||
|
AliasMultiMap &WriteObjects,
|
||||||
|
unsigned MaxByteWidth);
|
||||||
|
/// Return the AA location for a load or a store.
|
||||||
|
AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
|
||||||
|
|
||||||
|
|
||||||
/// The loop that we evaluate.
|
/// The loop that we evaluate.
|
||||||
Loop *TheLoop;
|
Loop *TheLoop;
|
||||||
@ -632,6 +642,10 @@ private:
|
|||||||
DataLayout *DL;
|
DataLayout *DL;
|
||||||
/// Dominators.
|
/// Dominators.
|
||||||
DominatorTree *DT;
|
DominatorTree *DT;
|
||||||
|
/// Target Info.
|
||||||
|
TargetTransformInfo *TTI;
|
||||||
|
/// Alias Analysis.
|
||||||
|
AliasAnalysis *AA;
|
||||||
/// Target Library Info.
|
/// Target Library Info.
|
||||||
TargetLibraryInfo *TLI;
|
TargetLibraryInfo *TLI;
|
||||||
|
|
||||||
@ -661,8 +675,6 @@ private:
|
|||||||
/// Can we assume the absence of NaNs.
|
/// Can we assume the absence of NaNs.
|
||||||
bool HasFunNoNaNAttr;
|
bool HasFunNoNaNAttr;
|
||||||
|
|
||||||
unsigned MaxSafeDepDistBytes;
|
|
||||||
|
|
||||||
/// Utility to determine whether loads can be speculated.
|
/// Utility to determine whether loads can be speculated.
|
||||||
LoadHoisting LoadSpeculation;
|
LoadHoisting LoadSpeculation;
|
||||||
};
|
};
|
||||||
@ -891,6 +903,7 @@ struct LoopVectorize : public LoopPass {
|
|||||||
LoopInfo *LI;
|
LoopInfo *LI;
|
||||||
TargetTransformInfo *TTI;
|
TargetTransformInfo *TTI;
|
||||||
DominatorTree *DT;
|
DominatorTree *DT;
|
||||||
|
AliasAnalysis *AA;
|
||||||
TargetLibraryInfo *TLI;
|
TargetLibraryInfo *TLI;
|
||||||
|
|
||||||
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
|
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||||
@ -903,6 +916,7 @@ struct LoopVectorize : public LoopPass {
|
|||||||
LI = &getAnalysis<LoopInfo>();
|
LI = &getAnalysis<LoopInfo>();
|
||||||
TTI = &getAnalysis<TargetTransformInfo>();
|
TTI = &getAnalysis<TargetTransformInfo>();
|
||||||
DT = &getAnalysis<DominatorTree>();
|
DT = &getAnalysis<DominatorTree>();
|
||||||
|
AA = getAnalysisIfAvailable<AliasAnalysis>();
|
||||||
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
|
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
|
||||||
|
|
||||||
if (DL == NULL) {
|
if (DL == NULL) {
|
||||||
@ -921,7 +935,7 @@ struct LoopVectorize : public LoopPass {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if it is legal to vectorize the loop.
|
// Check if it is legal to vectorize the loop.
|
||||||
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
|
LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
|
||||||
if (!LVL.canVectorize()) {
|
if (!LVL.canVectorize()) {
|
||||||
DEBUG(dbgs() << "LV: Not vectorizing.\n");
|
DEBUG(dbgs() << "LV: Not vectorizing.\n");
|
||||||
return false;
|
return false;
|
||||||
@ -996,8 +1010,7 @@ struct LoopVectorize : public LoopPass {
|
|||||||
void
|
void
|
||||||
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
|
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
|
||||||
Loop *Lp, Value *Ptr,
|
Loop *Lp, Value *Ptr,
|
||||||
bool WritePtr,
|
bool WritePtr) {
|
||||||
unsigned DepSetId) {
|
|
||||||
const SCEV *Sc = SE->getSCEV(Ptr);
|
const SCEV *Sc = SE->getSCEV(Ptr);
|
||||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
|
||||||
assert(AR && "Invalid addrec expression");
|
assert(AR && "Invalid addrec expression");
|
||||||
@ -1007,7 +1020,6 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
|
|||||||
Starts.push_back(AR->getStart());
|
Starts.push_back(AR->getStart());
|
||||||
Ends.push_back(ScEnd);
|
Ends.push_back(ScEnd);
|
||||||
IsWritePtr.push_back(WritePtr);
|
IsWritePtr.push_back(WritePtr);
|
||||||
DependencySetId.push_back(DepSetId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
|
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
|
||||||
@ -1345,9 +1357,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
if (!PtrRtCheck->Need)
|
if (!PtrRtCheck->Need)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
Instruction *MemoryRuntimeCheck = 0;
|
||||||
unsigned NumPointers = PtrRtCheck->Pointers.size();
|
unsigned NumPointers = PtrRtCheck->Pointers.size();
|
||||||
SmallVector<TrackingVH<Value> , 2> Starts;
|
SmallVector<Value* , 2> Starts;
|
||||||
SmallVector<TrackingVH<Value> , 2> Ends;
|
SmallVector<Value* , 2> Ends;
|
||||||
|
|
||||||
SCEVExpander Exp(*SE, "induction");
|
SCEVExpander Exp(*SE, "induction");
|
||||||
|
|
||||||
@ -1374,18 +1387,13 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
}
|
}
|
||||||
|
|
||||||
IRBuilder<> ChkBuilder(Loc);
|
IRBuilder<> ChkBuilder(Loc);
|
||||||
// Our instructions might fold to a constant.
|
|
||||||
Value *MemoryRuntimeCheck = 0;
|
|
||||||
for (unsigned i = 0; i < NumPointers; ++i) {
|
for (unsigned i = 0; i < NumPointers; ++i) {
|
||||||
for (unsigned j = i+1; j < NumPointers; ++j) {
|
for (unsigned j = i+1; j < NumPointers; ++j) {
|
||||||
// No need to check if two readonly pointers intersect.
|
// No need to check if two readonly pointers intersect.
|
||||||
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
|
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Only need to check pointers between two different dependency sets.
|
|
||||||
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
|
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
|
||||||
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
|
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
|
||||||
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
|
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
|
||||||
@ -1397,18 +1405,12 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
|
|||||||
if (MemoryRuntimeCheck)
|
if (MemoryRuntimeCheck)
|
||||||
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
|
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
|
||||||
"conflict.rdx");
|
"conflict.rdx");
|
||||||
MemoryRuntimeCheck = IsConflict;
|
|
||||||
|
MemoryRuntimeCheck = cast<Instruction>(IsConflict);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have to do this trickery because the IRBuilder might fold the check to a
|
return MemoryRuntimeCheck;
|
||||||
// constant expression in which case there is no Instruction anchored in a
|
|
||||||
// the block.
|
|
||||||
LLVMContext &Ctx = Loc->getContext();
|
|
||||||
Instruction * Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
|
|
||||||
ConstantInt::getTrue(Ctx));
|
|
||||||
ChkBuilder.Insert(Check, "memcheck.conflict");
|
|
||||||
return Check;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -2979,7 +2981,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
|||||||
// Each access has its own dependence set.
|
// Each access has its own dependence set.
|
||||||
DepId = RunningDepId++;
|
DepId = RunningDepId++;
|
||||||
|
|
||||||
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
|
//RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
|
||||||
} else {
|
} else {
|
||||||
@ -3461,29 +3463,53 @@ MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AliasAnalysis::Location
|
||||||
|
LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
|
||||||
|
if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
|
||||||
|
return AA->getLocation(Store);
|
||||||
|
else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
|
||||||
|
return AA->getLocation(Load);
|
||||||
|
|
||||||
|
llvm_unreachable("Should be either load or store instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
|
||||||
|
Value *Object,
|
||||||
|
Instruction *Inst,
|
||||||
|
AliasMultiMap& WriteObjects,
|
||||||
|
unsigned MaxByteWidth) {
|
||||||
|
|
||||||
|
AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
|
||||||
|
|
||||||
|
std::vector<Instruction*>::iterator
|
||||||
|
it = WriteObjects[Object].begin(),
|
||||||
|
end = WriteObjects[Object].end();
|
||||||
|
|
||||||
|
for (; it != end; ++it) {
|
||||||
|
Instruction* I = *it;
|
||||||
|
if (I == Inst)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
|
||||||
|
if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
|
||||||
|
ThatLoc.getWithNewSize(MaxByteWidth)))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::canVectorizeMemory() {
|
bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||||
|
|
||||||
typedef SmallVector<Value*, 16> ValueVector;
|
typedef SmallVector<Value*, 16> ValueVector;
|
||||||
typedef SmallPtrSet<Value*, 16> ValueSet;
|
typedef SmallPtrSet<Value*, 16> ValueSet;
|
||||||
|
|
||||||
// Stores a pair of memory access location and whether the access is a store
|
|
||||||
// (true) or a load (false).
|
|
||||||
typedef std::pair<Value*, char> MemAccessInfo;
|
|
||||||
typedef DenseSet<MemAccessInfo> PtrAccessSet;
|
|
||||||
|
|
||||||
// Holds the Load and Store *instructions*.
|
// Holds the Load and Store *instructions*.
|
||||||
ValueVector Loads;
|
ValueVector Loads;
|
||||||
ValueVector Stores;
|
ValueVector Stores;
|
||||||
|
|
||||||
// Holds all the different accesses in the loop.
|
|
||||||
unsigned NumReads = 0;
|
|
||||||
unsigned NumReadWrites = 0;
|
|
||||||
|
|
||||||
PtrRtCheck.Pointers.clear();
|
PtrRtCheck.Pointers.clear();
|
||||||
PtrRtCheck.Need = false;
|
PtrRtCheck.Need = false;
|
||||||
|
|
||||||
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
|
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
|
||||||
MemoryDepChecker DepChecker(SE, DL, TheLoop);
|
|
||||||
|
|
||||||
// For each block.
|
// For each block.
|
||||||
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
||||||
@ -3504,7 +3530,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
Loads.push_back(Ld);
|
Loads.push_back(Ld);
|
||||||
DepChecker.addAccess(Ld);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3517,7 +3542,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
Stores.push_back(St);
|
Stores.push_back(St);
|
||||||
DepChecker.addAccess(St);
|
|
||||||
}
|
}
|
||||||
} // next instr.
|
} // next instr.
|
||||||
} // next block.
|
} // next block.
|
||||||
@ -3532,8 +3556,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
AccessAnalysis::DepCandidates DependentAccesses;
|
// Holds the read and read-write *pointers* that we find. These maps hold
|
||||||
AccessAnalysis Accesses(DL, DependentAccesses);
|
// unique values for pointers (so no need for multi-map).
|
||||||
|
AliasMap Reads;
|
||||||
|
AliasMap ReadWrites;
|
||||||
|
|
||||||
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
|
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
|
||||||
// multiple times on the same object. If the ptr is accessed twice, once
|
// multiple times on the same object. If the ptr is accessed twice, once
|
||||||
@ -3552,12 +3578,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we did *not* see this pointer before, insert it to the read-write
|
// If we did *not* see this pointer before, insert it to
|
||||||
// list. At this phase it is only a 'write' list.
|
// the read-write list. At this phase it is only a 'write' list.
|
||||||
if (Seen.insert(Ptr)) {
|
if (Seen.insert(Ptr))
|
||||||
++NumReadWrites;
|
ReadWrites.insert(std::make_pair(Ptr, ST));
|
||||||
Accesses.addStore(Ptr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsAnnotatedParallel) {
|
if (IsAnnotatedParallel) {
|
||||||
@ -3567,7 +3591,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
SmallPtrSet<Value *, 16> ReadOnlyPtr;
|
|
||||||
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
|
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
|
||||||
LoadInst *LD = cast<LoadInst>(*I);
|
LoadInst *LD = cast<LoadInst>(*I);
|
||||||
Value* Ptr = LD->getPointerOperand();
|
Value* Ptr = LD->getPointerOperand();
|
||||||
@ -3579,44 +3602,51 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
// If the address of i is unknown (for example A[B[i]]) then we may
|
// If the address of i is unknown (for example A[B[i]]) then we may
|
||||||
// read a few words, modify, and write a few words, and some of the
|
// read a few words, modify, and write a few words, and some of the
|
||||||
// words may be written to the same address.
|
// words may be written to the same address.
|
||||||
bool IsReadOnlyPtr = false;
|
if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
|
||||||
if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop)) {
|
Reads.insert(std::make_pair(Ptr, LD));
|
||||||
++NumReads;
|
|
||||||
IsReadOnlyPtr = true;
|
|
||||||
}
|
|
||||||
Accesses.addLoad(Ptr, IsReadOnlyPtr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we write (or read-write) to a single destination and there are no
|
// If we write (or read-write) to a single destination and there are no
|
||||||
// other reads in this loop then is it safe to vectorize.
|
// other reads in this loop then is it safe to vectorize.
|
||||||
if (NumReadWrites == 1 && NumReads == 0) {
|
if (ReadWrites.size() == 1 && Reads.size() == 0) {
|
||||||
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
|
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build dependence sets and check whether we need a runtime pointer bounds
|
unsigned NumReadPtrs = 0;
|
||||||
// check.
|
unsigned NumWritePtrs = 0;
|
||||||
Accesses.buildDependenceSets();
|
|
||||||
bool NeedRTCheck = Accesses.isRTCheckNeeded();
|
|
||||||
|
|
||||||
// Find pointers with computable bounds. We are going to use this information
|
// Find pointers with computable bounds. We are going to use this information
|
||||||
// to place a runtime bound check.
|
// to place a runtime bound check.
|
||||||
unsigned NumComparisons = 0;
|
bool CanDoRT = true;
|
||||||
bool CanDoRT = false;
|
AliasMap::iterator MI, ME;
|
||||||
if (NeedRTCheck)
|
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
|
||||||
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
|
Value *V = (*MI).first;
|
||||||
|
if (hasComputableBounds(V)) {
|
||||||
|
PtrRtCheck.insert(SE, TheLoop, V, true);
|
||||||
|
NumWritePtrs++;
|
||||||
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
|
||||||
|
} else {
|
||||||
|
CanDoRT = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
|
||||||
|
Value *V = (*MI).first;
|
||||||
|
if (hasComputableBounds(V)) {
|
||||||
|
PtrRtCheck.insert(SE, TheLoop, V, false);
|
||||||
|
NumReadPtrs++;
|
||||||
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
|
||||||
|
} else {
|
||||||
|
CanDoRT = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we did not collect too many pointers or found a
|
||||||
DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
|
// unsizeable pointer.
|
||||||
" pointer comparisons.\n");
|
unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
|
||||||
|
DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
|
||||||
// If we only have one set of dependences to check pointers among we don't
|
|
||||||
// need a runtime check.
|
|
||||||
if (NumComparisons == 0 && NeedRTCheck)
|
|
||||||
NeedRTCheck = false;
|
|
||||||
|
|
||||||
// Check that we did not collect too many pointers or found a unsizeable
|
|
||||||
// pointer.
|
|
||||||
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
|
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
|
||||||
PtrRtCheck.reset();
|
PtrRtCheck.reset();
|
||||||
CanDoRT = false;
|
CanDoRT = false;
|
||||||
@ -3626,6 +3656,113 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
|
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool NeedRTCheck = false;
|
||||||
|
|
||||||
|
// Biggest vectorized access possible, vector width * unroll factor.
|
||||||
|
// TODO: We're being very pessimistic here, find a way to know the
|
||||||
|
// real access width before getting here.
|
||||||
|
unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
|
||||||
|
TTI->getMaximumUnrollFactor();
|
||||||
|
// Now that the pointers are in two lists (Reads and ReadWrites), we
|
||||||
|
// can check that there are no conflicts between each of the writes and
|
||||||
|
// between the writes to the reads.
|
||||||
|
// Note that WriteObjects duplicates the stores (indexed now by underlying
|
||||||
|
// objects) to avoid pointing to elements inside ReadWrites.
|
||||||
|
// TODO: Maybe create a new type where they can interact without duplication.
|
||||||
|
AliasMultiMap WriteObjects;
|
||||||
|
ValueVector TempObjects;
|
||||||
|
|
||||||
|
// Check that the read-writes do not conflict with other read-write
|
||||||
|
// pointers.
|
||||||
|
bool AllWritesIdentified = true;
|
||||||
|
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
|
||||||
|
Value *Val = (*MI).first;
|
||||||
|
Instruction *Inst = (*MI).second;
|
||||||
|
|
||||||
|
GetUnderlyingObjects(Val, TempObjects, DL);
|
||||||
|
for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
|
||||||
|
UI != UE; ++UI) {
|
||||||
|
if (!isIdentifiedObject(*UI)) {
|
||||||
|
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
|
||||||
|
NeedRTCheck = true;
|
||||||
|
AllWritesIdentified = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Never seen it before, can't alias.
|
||||||
|
if (WriteObjects[*UI].empty()) {
|
||||||
|
DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
|
||||||
|
WriteObjects[*UI].push_back(Inst);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Direct alias found.
|
||||||
|
if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
|
||||||
|
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
|
||||||
|
<< **UI <<"\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DEBUG(dbgs() << "LV: Found a conflicting global value:"
|
||||||
|
<< **UI <<"\n");
|
||||||
|
DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
|
||||||
|
DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
|
||||||
|
|
||||||
|
// If global alias, make sure they do alias.
|
||||||
|
if (hasPossibleGlobalWriteReorder(*UI,
|
||||||
|
Inst,
|
||||||
|
WriteObjects,
|
||||||
|
MaxByteWidth)) {
|
||||||
|
DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
|
||||||
|
<< "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Didn't alias, insert into map for further reference.
|
||||||
|
WriteObjects[*UI].push_back(Inst);
|
||||||
|
}
|
||||||
|
TempObjects.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check that the reads don't conflict with the read-writes.
|
||||||
|
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
|
||||||
|
Value *Val = (*MI).first;
|
||||||
|
GetUnderlyingObjects(Val, TempObjects, DL);
|
||||||
|
for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
|
||||||
|
UI != UE; ++UI) {
|
||||||
|
// If all of the writes are identified then we don't care if the read
|
||||||
|
// pointer is identified or not.
|
||||||
|
if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
|
||||||
|
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
|
||||||
|
NeedRTCheck = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Never seen it before, can't alias.
|
||||||
|
if (WriteObjects[*UI].empty())
|
||||||
|
continue;
|
||||||
|
// Direct alias found.
|
||||||
|
if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
|
||||||
|
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
|
||||||
|
<< **UI <<"\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DEBUG(dbgs() << "LV: Found a global value: "
|
||||||
|
<< **UI <<"\n");
|
||||||
|
Instruction *Inst = (*MI).second;
|
||||||
|
DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
|
||||||
|
DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
|
||||||
|
|
||||||
|
// If global alias, make sure they do alias.
|
||||||
|
if (hasPossibleGlobalWriteReorder(*UI,
|
||||||
|
Inst,
|
||||||
|
WriteObjects,
|
||||||
|
MaxByteWidth)) {
|
||||||
|
DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
|
||||||
|
<< "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TempObjects.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
PtrRtCheck.Need = NeedRTCheck;
|
||||||
if (NeedRTCheck && !CanDoRT) {
|
if (NeedRTCheck && !CanDoRT) {
|
||||||
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
|
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
|
||||||
"the array bounds.\n");
|
"the array bounds.\n");
|
||||||
@ -3633,20 +3770,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
PtrRtCheck.Need = NeedRTCheck;
|
|
||||||
|
|
||||||
bool CanVecMem = true;
|
|
||||||
if (Accesses.isDependencyCheckNeeded()) {
|
|
||||||
DEBUG(dbgs() << "LV: Checking memory dependencies\n");
|
|
||||||
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
|
|
||||||
Accesses.getDependenciesToCheck());
|
|
||||||
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
|
DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
|
||||||
" need a runtime memory check.\n");
|
" need a runtime memory check.\n");
|
||||||
|
return true;
|
||||||
return CanVecMem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hasMultipleUsesOf(Instruction *I,
|
static bool hasMultipleUsesOf(Instruction *I,
|
||||||
@ -3999,6 +4125,15 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
|
||||||
|
const SCEV *PhiScev = SE->getSCEV(Ptr);
|
||||||
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
|
||||||
|
if (!AR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return AR->isAffine();
|
||||||
|
}
|
||||||
|
|
||||||
LoopVectorizationCostModel::VectorizationFactor
|
LoopVectorizationCostModel::VectorizationFactor
|
||||||
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
|
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
|
||||||
unsigned UserVF) {
|
unsigned UserVF) {
|
||||||
@ -4015,10 +4150,6 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
|
|||||||
|
|
||||||
unsigned WidestType = getWidestType();
|
unsigned WidestType = getWidestType();
|
||||||
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
|
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
|
||||||
unsigned MaxSafeDepDist = -1U;
|
|
||||||
if (Legal->getMaxSafeDepDistBytes() != -1U)
|
|
||||||
MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
|
|
||||||
WidestRegister = WidestRegister < MaxSafeDepDist ? WidestRegister : MaxSafeDepDist;
|
|
||||||
unsigned MaxVectorSize = WidestRegister / WidestType;
|
unsigned MaxVectorSize = WidestRegister / WidestType;
|
||||||
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
|
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
|
||||||
DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
|
DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
|
||||||
@ -4152,10 +4283,6 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
|
|||||||
if (OptForSize)
|
if (OptForSize)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
// We used the distance for the unroll factor.
|
|
||||||
if (Legal->getMaxSafeDepDistBytes() != -1U)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
// Do not unroll loops with a relatively small trip count.
|
// Do not unroll loops with a relatively small trip count.
|
||||||
unsigned TC = SE->getSmallConstantTripCount(TheLoop,
|
unsigned TC = SE->getSmallConstantTripCount(TheLoop,
|
||||||
TheLoop->getLoopLatch());
|
TheLoop->getLoopLatch());
|
||||||
@ -4552,6 +4679,7 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
|
|||||||
char LoopVectorize::ID = 0;
|
char LoopVectorize::ID = 0;
|
||||||
static const char lv_name[] = "Loop Vectorization";
|
static const char lv_name[] = "Loop Vectorization";
|
||||||
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
|
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
|
||||||
|
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
||||||
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
|
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
|
||||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
||||||
|
@ -30,7 +30,7 @@ if.then: ; preds = %for.body
|
|||||||
if.end: ; preds = %for.body, %if.then
|
if.end: ; preds = %for.body, %if.then
|
||||||
%z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
|
%z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
|
||||||
store i32 %z.0, i32* %arrayidx, align 4
|
store i32 %z.0, i32* %arrayidx, align 4
|
||||||
%indvars.iv.next = add nsw i64 %indvars.iv, 1
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
%exitcond = icmp eq i32 %lftr.wideiv, %x
|
%exitcond = icmp eq i32 %lftr.wideiv, %x
|
||||||
br i1 %exitcond, label %for.end, label %for.body
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
@ -1,222 +0,0 @@
|
|||||||
; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
|
|
||||||
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
|
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
||||||
|
|
||||||
; Vectorization with dependence checks.
|
|
||||||
|
|
||||||
; No plausible dependence - can be vectorized.
|
|
||||||
; for (i = 0; i < 1024; ++i)
|
|
||||||
; A[i] = A[i + 1] + 1;
|
|
||||||
|
|
||||||
; CHECK: f1_vec
|
|
||||||
; CHECK: <2 x i32>
|
|
||||||
|
|
||||||
define void @f1_vec(i32* %A) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%indvars.iv.next = add i32 %indvars.iv, 1
|
|
||||||
%arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv.next
|
|
||||||
%0 = load i32* %arrayidx, align 4
|
|
||||||
%add1 = add nsw i32 %0, 1
|
|
||||||
%arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv
|
|
||||||
store i32 %add1, i32* %arrayidx3, align 4
|
|
||||||
%exitcond = icmp ne i32 %indvars.iv.next, 1024
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Plausible dependence of distance 1 - can't be vectorized.
|
|
||||||
; for (i = 0; i < 1024; ++i)
|
|
||||||
; A[i+1] = A[i] + 1;
|
|
||||||
|
|
||||||
; CHECK: f2_novec
|
|
||||||
; CHECK-NOT: <2 x i32>
|
|
||||||
|
|
||||||
define void @f2_novec(i32* %A) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv
|
|
||||||
%0 = load i32* %arrayidx, align 4
|
|
||||||
%add = add nsw i32 %0, 1
|
|
||||||
%indvars.iv.next = add i32 %indvars.iv, 1
|
|
||||||
%arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv.next
|
|
||||||
store i32 %add, i32* %arrayidx3, align 4
|
|
||||||
%exitcond = icmp ne i32 %indvars.iv.next, 1024
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Plausible dependence of distance 2 - can be vectorized with a width of 2.
|
|
||||||
; for (i = 0; i < 1024; ++i)
|
|
||||||
; A[i+2] = A[i] + 1;
|
|
||||||
|
|
||||||
; CHECK: f3_vec_len
|
|
||||||
; CHECK: <2 x i32>
|
|
||||||
|
|
||||||
; WIDTH: f3_vec_len
|
|
||||||
; WIDTH-NOT: <4 x i32>
|
|
||||||
|
|
||||||
define void @f3_vec_len(i32* %A) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
||||||
%idxprom = sext i32 %i.01 to i64
|
|
||||||
%arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
|
|
||||||
%0 = load i32* %arrayidx, align 4
|
|
||||||
%add = add nsw i32 %0, 1
|
|
||||||
%add1 = add nsw i32 %i.01, 2
|
|
||||||
%idxprom2 = sext i32 %add1 to i64
|
|
||||||
%arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
|
|
||||||
store i32 %add, i32* %arrayidx3, align 4
|
|
||||||
%inc = add nsw i32 %i.01, 1
|
|
||||||
%cmp = icmp slt i32 %inc, 1024
|
|
||||||
br i1 %cmp, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Plausible dependence of distance 1 - cannot be vectorized (without reordering
|
|
||||||
; accesses).
|
|
||||||
; for (i = 0; i < 1024; ++i) {
|
|
||||||
; B[i] = A[i];
|
|
||||||
; A[i] = B[i + 1];
|
|
||||||
; }
|
|
||||||
|
|
||||||
; CHECK: f5
|
|
||||||
; CHECK-NOT: <2 x i32>
|
|
||||||
|
|
||||||
define void @f5(i32* %A, i32* %B) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
|
|
||||||
%0 = load i32* %arrayidx, align 4
|
|
||||||
%arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
|
|
||||||
store i32 %0, i32* %arrayidx2, align 4
|
|
||||||
%indvars.iv.next = add nsw i64 %indvars.iv, 1
|
|
||||||
%arrayidx4 = getelementptr inbounds i32* %B, i64 %indvars.iv.next
|
|
||||||
%1 = load i32* %arrayidx4, align 4
|
|
||||||
store i32 %1, i32* %arrayidx, align 4
|
|
||||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
||||||
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Dependence through a phi node - must not vectorize.
|
|
||||||
; for (i = 0; i < 1024; ++i) {
|
|
||||||
; a[i+1] = tmp;
|
|
||||||
; tmp = a[i];
|
|
||||||
; }
|
|
||||||
|
|
||||||
; CHECK: f6
|
|
||||||
; CHECK-NOT: <2 x i32>
|
|
||||||
|
|
||||||
define i32 @f6(i32* %a, i32 %tmp) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
|
|
||||||
%indvars.iv.next = add nsw i64 %indvars.iv, 1
|
|
||||||
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv.next
|
|
||||||
store i32 %tmp.addr.08, i32* %arrayidx, align 4
|
|
||||||
%arrayidx3 = getelementptr inbounds i32* %a, i64 %indvars.iv
|
|
||||||
%0 = load i32* %arrayidx3, align 4
|
|
||||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
||||||
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret i32 undef
|
|
||||||
}
|
|
||||||
|
|
||||||
; Don't vectorize true loop carried dependencies that are not a multiple of the
|
|
||||||
; vector width.
|
|
||||||
; Example:
|
|
||||||
; for (int i = ...; ++i) {
|
|
||||||
; a[i] = a[i-3] + ...;
|
|
||||||
; It is a bad idea to vectorize this loop because store-load forwarding will not
|
|
||||||
; happen.
|
|
||||||
;
|
|
||||||
|
|
||||||
; CHECK: @nostoreloadforward
|
|
||||||
; CHECK-NOT: <2 x i32>
|
|
||||||
|
|
||||||
define void @nostoreloadforward(i32* %A) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%0 = add nsw i64 %indvars.iv, -3
|
|
||||||
%arrayidx = getelementptr inbounds i32* %A, i64 %0
|
|
||||||
%1 = load i32* %arrayidx, align 4
|
|
||||||
%2 = add nsw i64 %indvars.iv, 4
|
|
||||||
%arrayidx2 = getelementptr inbounds i32* %A, i64 %2
|
|
||||||
%3 = load i32* %arrayidx2, align 4
|
|
||||||
%add3 = add nsw i32 %3, %1
|
|
||||||
%arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv
|
|
||||||
store i32 %add3, i32* %arrayidx5, align 4
|
|
||||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
||||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
||||||
%exitcond = icmp ne i32 %lftr.wideiv, 128
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Example:
|
|
||||||
; for (int i = ...; ++i) {
|
|
||||||
; a[i] = b[i];
|
|
||||||
; c[i] = a[i-3] + ...;
|
|
||||||
; It is a bad idea to vectorize this loop because store-load forwarding will not
|
|
||||||
; happen.
|
|
||||||
;
|
|
||||||
|
|
||||||
; CHECK: @nostoreloadforward2
|
|
||||||
; CHECK-NOT: <2 x i32>
|
|
||||||
|
|
||||||
define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
|
|
||||||
entry:
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body:
|
|
||||||
%indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
%arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
|
|
||||||
%0 = load i32* %arrayidx, align 4
|
|
||||||
%arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
|
|
||||||
store i32 %0, i32* %arrayidx2, align 4
|
|
||||||
%1 = add nsw i64 %indvars.iv, -3
|
|
||||||
%arrayidx4 = getelementptr inbounds i32* %A, i64 %1
|
|
||||||
%2 = load i32* %arrayidx4, align 4
|
|
||||||
%arrayidx6 = getelementptr inbounds i32* %C, i64 %indvars.iv
|
|
||||||
store i32 %2, i32* %arrayidx6, align 4
|
|
||||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
||||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
||||||
%exitcond = icmp ne i32 %lftr.wideiv, 128
|
|
||||||
br i1 %exitcond, label %for.body, label %for.end
|
|
||||||
|
|
||||||
for.end:
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -12,7 +12,7 @@ target triple = "x86_64-apple-macosx10.9.0"
|
|||||||
;CHECK: for.body.preheader:
|
;CHECK: for.body.preheader:
|
||||||
;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
|
;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
|
||||||
;CHECK: vector.memcheck:
|
;CHECK: vector.memcheck:
|
||||||
;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph
|
;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
|
||||||
;CHECK: load <4 x float>
|
;CHECK: load <4 x float>
|
||||||
define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
|
define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
|
||||||
entry:
|
entry:
|
||||||
|
Loading…
Reference in New Issue
Block a user