mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-29 07:39:27 +00:00
Revert r229622: "[LoopAccesses] Make VectorizerParams global" and others. r229622 brought cyclic dependencies between Analysis and Vector.
r229622: "[LoopAccesses] Make VectorizerParams global" r229623: "[LoopAccesses] Stash the report from the analysis rather than emitting it" r229624: "[LoopAccesses] Cache the result of canVectorizeMemory" r229626: "[LoopAccesses] Create the analysis pass" r229628: "[LoopAccesses] Change debug messages from LV to LAA" r229630: "[LoopAccesses] Add canAnalyzeLoop" r229631: "[LoopAccesses] Add missing const to APIs in VectorizationReport" r229632: "[LoopAccesses] Split out LoopAccessReport from VectorizerReport" r229633: "[LoopAccesses] Add -analyze support" r229634: "[LoopAccesses] Change LAA:getInfo to return a constant reference" r229638: "Analysis: fix buildbots" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229650 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
87010b0917
commit
383d8c7fdd
@ -16,13 +16,11 @@
|
|||||||
#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
|
#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
|
||||||
|
|
||||||
#include "llvm/ADT/EquivalenceClasses.h"
|
#include "llvm/ADT/EquivalenceClasses.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
|
||||||
#include "llvm/ADT/SetVector.h"
|
#include "llvm/ADT/SetVector.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Analysis/AliasSetTracker.h"
|
#include "llvm/Analysis/AliasSetTracker.h"
|
||||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||||
#include "llvm/IR/ValueHandle.h"
|
#include "llvm/IR/ValueHandle.h"
|
||||||
#include "llvm/Pass.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
@ -36,52 +34,30 @@ class SCEV;
|
|||||||
|
|
||||||
/// Optimization analysis message produced during vectorization. Messages inform
|
/// Optimization analysis message produced during vectorization. Messages inform
|
||||||
/// the user why vectorization did not occur.
|
/// the user why vectorization did not occur.
|
||||||
class LoopAccessReport {
|
class VectorizationReport {
|
||||||
std::string Message;
|
std::string Message;
|
||||||
const Instruction *Instr;
|
Instruction *Instr;
|
||||||
|
|
||||||
protected:
|
|
||||||
LoopAccessReport(const Twine &Message, const Instruction *I)
|
|
||||||
: Message(Message.str()), Instr(I) {}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
LoopAccessReport(const Instruction *I = nullptr) : Instr(I) {}
|
VectorizationReport(Instruction *I = nullptr)
|
||||||
|
: Message("loop not vectorized: "), Instr(I) {}
|
||||||
|
|
||||||
template <typename A> LoopAccessReport &operator<<(const A &Value) {
|
template <typename A> VectorizationReport &operator<<(const A &Value) {
|
||||||
raw_string_ostream Out(Message);
|
raw_string_ostream Out(Message);
|
||||||
Out << Value;
|
Out << Value;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Instruction *getInstr() const { return Instr; }
|
Instruction *getInstr() { return Instr; }
|
||||||
|
|
||||||
std::string &str() { return Message; }
|
std::string &str() { return Message; }
|
||||||
const std::string &str() const { return Message; }
|
|
||||||
operator Twine() { return Message; }
|
operator Twine() { return Message; }
|
||||||
|
|
||||||
/// \brief Emit an analysis note for \p PassName with the debug location from
|
/// \brief Emit an analysis note with the debug location from the instruction
|
||||||
/// the instruction in \p Message if available. Otherwise use the location of
|
/// in \p Message if available. Otherwise use the location of \p TheLoop.
|
||||||
/// \p TheLoop.
|
static void emitAnalysis(VectorizationReport &Message,
|
||||||
static void emitAnalysis(const LoopAccessReport &Message,
|
|
||||||
const Function *TheFunction,
|
const Function *TheFunction,
|
||||||
const Loop *TheLoop,
|
const Loop *TheLoop);
|
||||||
const char *PassName);
|
|
||||||
};
|
|
||||||
|
|
||||||
/// \brief Collection of parameters shared beetween the Loop Vectorizer and the
|
|
||||||
/// Loop Access Analysis.
|
|
||||||
struct VectorizerParams {
|
|
||||||
/// \brief Maximum SIMD width.
|
|
||||||
static const unsigned MaxVectorWidth;
|
|
||||||
|
|
||||||
/// \brief VF as overridden by the user.
|
|
||||||
static unsigned VectorizationFactor;
|
|
||||||
/// \brief Interleave factor as overridden by the user.
|
|
||||||
static unsigned VectorizationInterleave;
|
|
||||||
|
|
||||||
/// \\brief When performing memory disambiguation checks at runtime do not
|
|
||||||
/// make more than this number of comparisons.
|
|
||||||
static const unsigned RuntimeMemoryCheckThreshold;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \brief Drive the analysis of memory accesses in the loop
|
/// \brief Drive the analysis of memory accesses in the loop
|
||||||
@ -100,6 +76,30 @@ struct VectorizerParams {
|
|||||||
/// RuntimePointerCheck class.
|
/// RuntimePointerCheck class.
|
||||||
class LoopAccessInfo {
|
class LoopAccessInfo {
|
||||||
public:
|
public:
|
||||||
|
/// \brief Collection of parameters used from the vectorizer.
|
||||||
|
struct VectorizerParams {
|
||||||
|
/// \brief Maximum simd width.
|
||||||
|
unsigned MaxVectorWidth;
|
||||||
|
|
||||||
|
/// \brief VF as overridden by the user.
|
||||||
|
unsigned VectorizationFactor;
|
||||||
|
/// \brief Interleave factor as overridden by the user.
|
||||||
|
unsigned VectorizationInterleave;
|
||||||
|
|
||||||
|
/// \\brief When performing memory disambiguation checks at runtime do not
|
||||||
|
/// make more than this number of comparisons.
|
||||||
|
unsigned RuntimeMemoryCheckThreshold;
|
||||||
|
|
||||||
|
VectorizerParams(unsigned MaxVectorWidth,
|
||||||
|
unsigned VectorizationFactor,
|
||||||
|
unsigned VectorizationInterleave,
|
||||||
|
unsigned RuntimeMemoryCheckThreshold) :
|
||||||
|
MaxVectorWidth(MaxVectorWidth),
|
||||||
|
VectorizationFactor(VectorizationFactor),
|
||||||
|
VectorizationInterleave(VectorizationInterleave),
|
||||||
|
RuntimeMemoryCheckThreshold(RuntimeMemoryCheckThreshold) {}
|
||||||
|
};
|
||||||
|
|
||||||
/// This struct holds information about the memory runtime legality check that
|
/// This struct holds information about the memory runtime legality check that
|
||||||
/// a group of pointers do not overlap.
|
/// a group of pointers do not overlap.
|
||||||
struct RuntimePointerCheck {
|
struct RuntimePointerCheck {
|
||||||
@ -120,16 +120,10 @@ public:
|
|||||||
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
|
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
|
||||||
unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
|
unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
|
||||||
|
|
||||||
/// \brief No run-time memory checking is necessary.
|
|
||||||
bool empty() const { return Pointers.empty(); }
|
|
||||||
|
|
||||||
/// \brief Decide whether we need to issue a run-time check for pointer at
|
/// \brief Decide whether we need to issue a run-time check for pointer at
|
||||||
/// index \p I and \p J to prove their independence.
|
/// index \p I and \p J to prove their independence.
|
||||||
bool needsChecking(unsigned I, unsigned J) const;
|
bool needsChecking(unsigned I, unsigned J) const;
|
||||||
|
|
||||||
/// \brief Print the list run-time memory checks necessary.
|
|
||||||
void print(raw_ostream &OS, unsigned Depth = 0) const;
|
|
||||||
|
|
||||||
/// This flag indicates if we need to add the runtime check.
|
/// This flag indicates if we need to add the runtime check.
|
||||||
bool Need;
|
bool Need;
|
||||||
/// Holds the pointers that we need to check.
|
/// Holds the pointers that we need to check.
|
||||||
@ -147,17 +141,19 @@ public:
|
|||||||
SmallVector<unsigned, 2> AliasSetId;
|
SmallVector<unsigned, 2> AliasSetId;
|
||||||
};
|
};
|
||||||
|
|
||||||
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
|
LoopAccessInfo(Function *F, Loop *L, ScalarEvolution *SE,
|
||||||
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
const DataLayout *DL, const TargetLibraryInfo *TLI,
|
||||||
DominatorTree *DT, ValueToValueMap &Strides);
|
AliasAnalysis *AA, DominatorTree *DT,
|
||||||
|
const VectorizerParams &VectParams) :
|
||||||
|
TheFunction(F), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT),
|
||||||
|
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U),
|
||||||
|
VectParams(VectParams) {}
|
||||||
|
|
||||||
/// Return true we can analyze the memory accesses in the loop and there are
|
/// Return true we can analyze the memory accesses in the loop and there are
|
||||||
/// no memory dependence cycles.
|
/// no memory dependence cycles. Replaces symbolic strides using Strides.
|
||||||
bool canVectorizeMemory() const { return CanVecMem; }
|
bool canVectorizeMemory(ValueToValueMap &Strides);
|
||||||
|
|
||||||
const RuntimePointerCheck *getRuntimePointerCheck() const {
|
RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
|
||||||
return &PtrRtCheck;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return true if the block BB needs to be predicated in order for the loop
|
/// Return true if the block BB needs to be predicated in order for the loop
|
||||||
/// to be vectorized.
|
/// to be vectorized.
|
||||||
@ -165,7 +161,7 @@ public:
|
|||||||
DominatorTree *DT);
|
DominatorTree *DT);
|
||||||
|
|
||||||
/// Returns true if the value V is uniform within the loop.
|
/// Returns true if the value V is uniform within the loop.
|
||||||
bool isUniform(Value *V) const;
|
bool isUniform(Value *V);
|
||||||
|
|
||||||
unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
|
unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
|
||||||
unsigned getNumStores() const { return NumStores; }
|
unsigned getNumStores() const { return NumStores; }
|
||||||
@ -176,34 +172,15 @@ public:
|
|||||||
/// Returns a pair of instructions where the first element is the first
|
/// Returns a pair of instructions where the first element is the first
|
||||||
/// instruction generated in possibly a sequence of instructions and the
|
/// instruction generated in possibly a sequence of instructions and the
|
||||||
/// second value is the final comparator value or NULL if no check is needed.
|
/// second value is the final comparator value or NULL if no check is needed.
|
||||||
std::pair<Instruction *, Instruction *>
|
std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
|
||||||
addRuntimeCheck(Instruction *Loc) const;
|
|
||||||
|
|
||||||
/// \brief The diagnostics report generated for the analysis. E.g. why we
|
|
||||||
/// couldn't analyze the loop.
|
|
||||||
const Optional<LoopAccessReport> &getReport() const { return Report; }
|
|
||||||
|
|
||||||
/// \brief Print the information about the memory accesses in the loop.
|
|
||||||
void print(raw_ostream &OS, unsigned Depth = 0) const;
|
|
||||||
|
|
||||||
/// \brief Used to ensure that if the analysis was run with speculating the
|
|
||||||
/// value of symbolic strides, the client queries it with the same assumption.
|
|
||||||
/// Only used in DEBUG build but we don't want NDEBUG-depedent ABI.
|
|
||||||
unsigned NumSymbolicStrides;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// \brief Analyze the loop. Substitute symbolic strides using Strides.
|
void emitAnalysis(VectorizationReport &Message);
|
||||||
void analyzeLoop(ValueToValueMap &Strides);
|
|
||||||
|
|
||||||
/// \brief Check if the structure of the loop allows it to be analyzed by this
|
|
||||||
/// pass.
|
|
||||||
bool canAnalyzeLoop();
|
|
||||||
|
|
||||||
void emitAnalysis(LoopAccessReport &Message);
|
|
||||||
|
|
||||||
/// We need to check that all of the pointers in this list are disjoint
|
/// We need to check that all of the pointers in this list are disjoint
|
||||||
/// at runtime.
|
/// at runtime.
|
||||||
RuntimePointerCheck PtrRtCheck;
|
RuntimePointerCheck PtrRtCheck;
|
||||||
|
Function *TheFunction;
|
||||||
Loop *TheLoop;
|
Loop *TheLoop;
|
||||||
ScalarEvolution *SE;
|
ScalarEvolution *SE;
|
||||||
const DataLayout *DL;
|
const DataLayout *DL;
|
||||||
@ -216,12 +193,8 @@ private:
|
|||||||
|
|
||||||
unsigned MaxSafeDepDistBytes;
|
unsigned MaxSafeDepDistBytes;
|
||||||
|
|
||||||
/// \brief Cache the result of analyzeLoop.
|
/// \brief Vectorizer parameters used by the analysis.
|
||||||
bool CanVecMem;
|
VectorizerParams VectParams;
|
||||||
|
|
||||||
/// \brief The diagnostics report generated for the analysis. E.g. why we
|
|
||||||
/// couldn't analyze the loop.
|
|
||||||
Optional<LoopAccessReport> Report;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Value *stripIntegerCast(Value *V);
|
Value *stripIntegerCast(Value *V);
|
||||||
@ -236,52 +209,6 @@ const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
|
|||||||
ValueToValueMap &PtrToStride,
|
ValueToValueMap &PtrToStride,
|
||||||
Value *Ptr, Value *OrigPtr = nullptr);
|
Value *Ptr, Value *OrigPtr = nullptr);
|
||||||
|
|
||||||
/// \brief This analysis provides dependence information for the memory accesses
|
|
||||||
/// of a loop.
|
|
||||||
///
|
|
||||||
/// It runs the analysis for a loop on demand. This can be initiated by
|
|
||||||
/// querying the loop access info via LAA::getInfo. getInfo return a
|
|
||||||
/// LoopAccessInfo object. See this class for the specifics of what information
|
|
||||||
/// is provided.
|
|
||||||
class LoopAccessAnalysis : public FunctionPass {
|
|
||||||
public:
|
|
||||||
static char ID;
|
|
||||||
|
|
||||||
LoopAccessAnalysis() : FunctionPass(ID) {
|
|
||||||
initializeLoopAccessAnalysisPass(*PassRegistry::getPassRegistry());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override;
|
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
||||||
|
|
||||||
/// \brief Query the result of the loop access information for the loop \p L.
|
|
||||||
///
|
|
||||||
/// If the client speculates (and then issues run-time checks) for the values
|
|
||||||
/// of symbolic strides, \p Strides provides the mapping (see
|
|
||||||
/// replaceSymbolicStrideSCEV). If there is no cached result available run
|
|
||||||
/// the analysis.
|
|
||||||
const LoopAccessInfo &getInfo(Loop *L, ValueToValueMap &Strides);
|
|
||||||
|
|
||||||
void releaseMemory() override {
|
|
||||||
// Invalidate the cache when the pass is freed.
|
|
||||||
LoopAccessInfoMap.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Print the result of the analysis when invoked with -analyze.
|
|
||||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// \brief The cache.
|
|
||||||
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
|
|
||||||
|
|
||||||
// The used analysis passes.
|
|
||||||
ScalarEvolution *SE;
|
|
||||||
const DataLayout *DL;
|
|
||||||
const TargetLibraryInfo *TLI;
|
|
||||||
AliasAnalysis *AA;
|
|
||||||
DominatorTree *DT;
|
|
||||||
};
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -281,7 +281,6 @@ void initializeVirtRegRewriterPass(PassRegistry&);
|
|||||||
void initializeInstSimplifierPass(PassRegistry&);
|
void initializeInstSimplifierPass(PassRegistry&);
|
||||||
void initializeUnpackMachineBundlesPass(PassRegistry&);
|
void initializeUnpackMachineBundlesPass(PassRegistry&);
|
||||||
void initializeFinalizeMachineBundlesPass(PassRegistry&);
|
void initializeFinalizeMachineBundlesPass(PassRegistry&);
|
||||||
void initializeLoopAccessAnalysisPass(PassRegistry&);
|
|
||||||
void initializeLoopVectorizePass(PassRegistry&);
|
void initializeLoopVectorizePass(PassRegistry&);
|
||||||
void initializeSLPVectorizerPass(PassRegistry&);
|
void initializeSLPVectorizerPass(PassRegistry&);
|
||||||
void initializeBBVectorizePass(PassRegistry&);
|
void initializeBBVectorizePass(PassRegistry&);
|
||||||
|
@ -23,16 +23,15 @@
|
|||||||
#include "llvm/Transforms/Utils/VectorUtils.h"
|
#include "llvm/Transforms/Utils/VectorUtils.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "loop-accesses"
|
#define DEBUG_TYPE "loop-vectorize"
|
||||||
|
|
||||||
void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
|
void VectorizationReport::emitAnalysis(VectorizationReport &Message,
|
||||||
const Function *TheFunction,
|
const Function *TheFunction,
|
||||||
const Loop *TheLoop,
|
const Loop *TheLoop) {
|
||||||
const char *PassName) {
|
|
||||||
DebugLoc DL = TheLoop->getStartLoc();
|
DebugLoc DL = TheLoop->getStartLoc();
|
||||||
if (const Instruction *I = Message.getInstr())
|
if (Instruction *I = Message.getInstr())
|
||||||
DL = I->getDebugLoc();
|
DL = I->getDebugLoc();
|
||||||
emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
|
emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
|
||||||
*TheFunction, DL, Message.str());
|
*TheFunction, DL, Message.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,7 +64,7 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
|
|||||||
|
|
||||||
const SCEV *ByOne =
|
const SCEV *ByOne =
|
||||||
SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
|
SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
|
||||||
DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
|
DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
|
||||||
<< "\n");
|
<< "\n");
|
||||||
return ByOne;
|
return ByOne;
|
||||||
}
|
}
|
||||||
@ -110,23 +109,6 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS,
|
|
||||||
unsigned Depth) const {
|
|
||||||
unsigned NumPointers = Pointers.size();
|
|
||||||
if (NumPointers == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
OS.indent(Depth) << "Run-time memory checks:\n";
|
|
||||||
unsigned N = 0;
|
|
||||||
for (unsigned I = 0; I < NumPointers; ++I)
|
|
||||||
for (unsigned J = I + 1; J < NumPointers; ++J)
|
|
||||||
if (needsChecking(I, J)) {
|
|
||||||
OS.indent(Depth) << N++ << ":\n";
|
|
||||||
OS.indent(Depth + 2) << *Pointers[I] << "\n";
|
|
||||||
OS.indent(Depth + 2) << *Pointers[J] << "\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
/// \brief Analyses memory accesses in a loop.
|
/// \brief Analyses memory accesses in a loop.
|
||||||
///
|
///
|
||||||
@ -282,7 +264,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
|||||||
|
|
||||||
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
|
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
|
||||||
} else {
|
} else {
|
||||||
CanDoRT = false;
|
CanDoRT = false;
|
||||||
}
|
}
|
||||||
@ -319,7 +301,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
|||||||
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
|
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
|
||||||
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
|
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
|
||||||
if (ASi != ASj) {
|
if (ASi != ASj) {
|
||||||
DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
|
DEBUG(dbgs() << "LV: Runtime check would require comparison between"
|
||||||
" different address spaces\n");
|
" different address spaces\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -334,9 +316,9 @@ void AccessAnalysis::processMemAccesses() {
|
|||||||
// process read-only pointers. This allows us to skip dependence tests for
|
// process read-only pointers. This allows us to skip dependence tests for
|
||||||
// read-only pointers.
|
// read-only pointers.
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
|
DEBUG(dbgs() << "LV: Processing memory accesses...\n");
|
||||||
DEBUG(dbgs() << " AST: "; AST.dump());
|
DEBUG(dbgs() << " AST: "; AST.dump());
|
||||||
DEBUG(dbgs() << "LAA: Accesses:\n");
|
DEBUG(dbgs() << "LV: Accesses:\n");
|
||||||
DEBUG({
|
DEBUG({
|
||||||
for (auto A : Accesses)
|
for (auto A : Accesses)
|
||||||
dbgs() << "\t" << *A.getPointer() << " (" <<
|
dbgs() << "\t" << *A.getPointer() << " (" <<
|
||||||
@ -472,9 +454,10 @@ public:
|
|||||||
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
|
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
|
||||||
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
|
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
|
||||||
|
|
||||||
MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
|
MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L,
|
||||||
|
const LoopAccessInfo::VectorizerParams &VectParams)
|
||||||
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
|
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
|
||||||
ShouldRetryWithRuntimeCheck(false) {}
|
ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {}
|
||||||
|
|
||||||
/// \brief Register the location (instructions are given increasing numbers)
|
/// \brief Register the location (instructions are given increasing numbers)
|
||||||
/// of a write access.
|
/// of a write access.
|
||||||
@ -529,6 +512,9 @@ private:
|
|||||||
/// vectorize this loop with runtime checks.
|
/// vectorize this loop with runtime checks.
|
||||||
bool ShouldRetryWithRuntimeCheck;
|
bool ShouldRetryWithRuntimeCheck;
|
||||||
|
|
||||||
|
/// \brief Vectorizer parameters used by the analysis.
|
||||||
|
LoopAccessInfo::VectorizerParams VectParams;
|
||||||
|
|
||||||
/// \brief Check whether there is a plausible dependence between the two
|
/// \brief Check whether there is a plausible dependence between the two
|
||||||
/// accesses.
|
/// accesses.
|
||||||
///
|
///
|
||||||
@ -567,8 +553,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
|
|||||||
// Make sure that the pointer does not point to aggregate types.
|
// Make sure that the pointer does not point to aggregate types.
|
||||||
const PointerType *PtrTy = cast<PointerType>(Ty);
|
const PointerType *PtrTy = cast<PointerType>(Ty);
|
||||||
if (PtrTy->getElementType()->isAggregateType()) {
|
if (PtrTy->getElementType()->isAggregateType()) {
|
||||||
DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
|
DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
|
||||||
<< *Ptr << "\n");
|
"\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -576,14 +562,14 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
|
|||||||
|
|
||||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
|
||||||
if (!AR) {
|
if (!AR) {
|
||||||
DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
|
DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
|
||||||
<< *Ptr << " SCEV: " << *PtrScev << "\n");
|
<< *Ptr << " SCEV: " << *PtrScev << "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The accesss function must stride over the innermost loop.
|
// The accesss function must stride over the innermost loop.
|
||||||
if (Lp != AR->getLoop()) {
|
if (Lp != AR->getLoop()) {
|
||||||
DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
|
DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
|
||||||
*Ptr << " SCEV: " << *PtrScev << "\n");
|
*Ptr << " SCEV: " << *PtrScev << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -598,7 +584,7 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
|
|||||||
bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
|
bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
|
||||||
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
|
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
|
||||||
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
|
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
|
||||||
DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
|
DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
|
||||||
<< *Ptr << " SCEV: " << *PtrScev << "\n");
|
<< *Ptr << " SCEV: " << *PtrScev << "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -609,7 +595,7 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
|
|||||||
// Calculate the pointer stride and check if it is consecutive.
|
// Calculate the pointer stride and check if it is consecutive.
|
||||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
||||||
if (!C) {
|
if (!C) {
|
||||||
DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
|
DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
|
||||||
" SCEV: " << *PtrScev << "\n");
|
" SCEV: " << *PtrScev << "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -652,8 +638,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
|
|||||||
// Store-load forwarding distance.
|
// Store-load forwarding distance.
|
||||||
const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
|
const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
|
||||||
// Maximum vector factor.
|
// Maximum vector factor.
|
||||||
unsigned MaxVFWithoutSLForwardIssues =
|
unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize;
|
||||||
VectorizerParams::MaxVectorWidth * TypeByteSize;
|
|
||||||
if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
|
if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
|
||||||
MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
|
MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
|
||||||
|
|
||||||
@ -666,14 +651,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
|
if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
|
||||||
DEBUG(dbgs() << "LAA: Distance " << Distance <<
|
DEBUG(dbgs() << "LV: Distance " << Distance <<
|
||||||
" that could cause a store-load forwarding conflict\n");
|
" that could cause a store-load forwarding conflict\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
|
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
|
||||||
MaxVFWithoutSLForwardIssues !=
|
MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize)
|
||||||
VectorizerParams::MaxVectorWidth * TypeByteSize)
|
|
||||||
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
|
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -720,9 +704,9 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
|
|
||||||
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
|
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
|
DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
|
||||||
<< "(Induction step: " << StrideAPtr << ")\n");
|
<< "(Induction step: " << StrideAPtr << ")\n");
|
||||||
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
|
DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
|
||||||
<< *InstMap[BIdx] << ": " << *Dist << "\n");
|
<< *InstMap[BIdx] << ": " << *Dist << "\n");
|
||||||
|
|
||||||
// Need consecutive accesses. We don't want to vectorize
|
// Need consecutive accesses. We don't want to vectorize
|
||||||
@ -735,7 +719,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
|
|
||||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
|
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
|
||||||
if (!C) {
|
if (!C) {
|
||||||
DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
|
DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
|
||||||
ShouldRetryWithRuntimeCheck = true;
|
ShouldRetryWithRuntimeCheck = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -753,7 +737,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
ATy != BTy))
|
ATy != BTy))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
|
DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -762,7 +746,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
if (Val == 0) {
|
if (Val == 0) {
|
||||||
if (ATy == BTy)
|
if (ATy == BTy)
|
||||||
return false;
|
return false;
|
||||||
DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
|
DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -771,17 +755,17 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
// Positive distance bigger than max vectorization factor.
|
// Positive distance bigger than max vectorization factor.
|
||||||
if (ATy != BTy) {
|
if (ATy != BTy) {
|
||||||
DEBUG(dbgs() <<
|
DEBUG(dbgs() <<
|
||||||
"LAA: ReadWrite-Write positive dependency with different types\n");
|
"LV: ReadWrite-Write positive dependency with different types\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Distance = (unsigned) Val.getZExtValue();
|
unsigned Distance = (unsigned) Val.getZExtValue();
|
||||||
|
|
||||||
// Bail out early if passed-in parameters make vectorization not feasible.
|
// Bail out early if passed-in parameters make vectorization not feasible.
|
||||||
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
|
unsigned ForcedFactor = (VectParams.VectorizationFactor ?
|
||||||
VectorizerParams::VectorizationFactor : 1);
|
VectParams.VectorizationFactor : 1);
|
||||||
unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
|
unsigned ForcedUnroll = (VectParams.VectorizationInterleave ?
|
||||||
VectorizerParams::VectorizationInterleave : 1);
|
VectParams.VectorizationInterleave : 1);
|
||||||
|
|
||||||
// The distance must be bigger than the size needed for a vectorized version
|
// The distance must be bigger than the size needed for a vectorized version
|
||||||
// of the operation and the size of the vectorized operation must not be
|
// of the operation and the size of the vectorized operation must not be
|
||||||
@ -789,7 +773,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
if (Distance < 2*TypeByteSize ||
|
if (Distance < 2*TypeByteSize ||
|
||||||
2*TypeByteSize > MaxSafeDepDistBytes ||
|
2*TypeByteSize > MaxSafeDepDistBytes ||
|
||||||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
|
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
|
||||||
DEBUG(dbgs() << "LAA: Failure because of Positive distance "
|
DEBUG(dbgs() << "LV: Failure because of Positive distance "
|
||||||
<< Val.getSExtValue() << '\n');
|
<< Val.getSExtValue() << '\n');
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -802,7 +786,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
couldPreventStoreLoadForward(Distance, TypeByteSize))
|
couldPreventStoreLoadForward(Distance, TypeByteSize))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
|
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
|
||||||
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
|
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -847,56 +831,7 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopAccessInfo::canAnalyzeLoop() {
|
bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
|
||||||
// We can only analyze innermost loops.
|
|
||||||
if (!TheLoop->empty()) {
|
|
||||||
emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We must have a single backedge.
|
|
||||||
if (TheLoop->getNumBackEdges() != 1) {
|
|
||||||
emitAnalysis(
|
|
||||||
LoopAccessReport() <<
|
|
||||||
"loop control flow is not understood by analyzer");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We must have a single exiting block.
|
|
||||||
if (!TheLoop->getExitingBlock()) {
|
|
||||||
emitAnalysis(
|
|
||||||
LoopAccessReport() <<
|
|
||||||
"loop control flow is not understood by analyzer");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We only handle bottom-tested loops, i.e. loop in which the condition is
|
|
||||||
// checked at the end of each iteration. With that we can assume that all
|
|
||||||
// instructions in the loop are executed the same number of times.
|
|
||||||
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
|
|
||||||
emitAnalysis(
|
|
||||||
LoopAccessReport() <<
|
|
||||||
"loop control flow is not understood by analyzer");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We need to have a loop header.
|
|
||||||
DEBUG(dbgs() << "LAA: Found a loop: " <<
|
|
||||||
TheLoop->getHeader()->getName() << '\n');
|
|
||||||
|
|
||||||
// ScalarEvolution needs to be able to find the exit count.
|
|
||||||
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
|
|
||||||
if (ExitCount == SE->getCouldNotCompute()) {
|
|
||||||
emitAnalysis(LoopAccessReport() <<
|
|
||||||
"could not determine number of loop iterations");
|
|
||||||
DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|
||||||
|
|
||||||
typedef SmallVector<Value*, 16> ValueVector;
|
typedef SmallVector<Value*, 16> ValueVector;
|
||||||
typedef SmallPtrSet<Value*, 16> ValueSet;
|
typedef SmallPtrSet<Value*, 16> ValueSet;
|
||||||
@ -913,7 +848,7 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
PtrRtCheck.Need = false;
|
PtrRtCheck.Need = false;
|
||||||
|
|
||||||
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
|
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
|
||||||
MemoryDepChecker DepChecker(SE, DL, TheLoop);
|
MemoryDepChecker DepChecker(SE, DL, TheLoop, VectParams);
|
||||||
|
|
||||||
// For each block.
|
// For each block.
|
||||||
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
||||||
@ -936,11 +871,10 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
|
|
||||||
LoadInst *Ld = dyn_cast<LoadInst>(it);
|
LoadInst *Ld = dyn_cast<LoadInst>(it);
|
||||||
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
|
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
|
||||||
emitAnalysis(LoopAccessReport(Ld)
|
emitAnalysis(VectorizationReport(Ld)
|
||||||
<< "read with atomic ordering or volatile read");
|
<< "read with atomic ordering or volatile read");
|
||||||
DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
|
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
NumLoads++;
|
NumLoads++;
|
||||||
Loads.push_back(Ld);
|
Loads.push_back(Ld);
|
||||||
@ -952,17 +886,15 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
if (it->mayWriteToMemory()) {
|
if (it->mayWriteToMemory()) {
|
||||||
StoreInst *St = dyn_cast<StoreInst>(it);
|
StoreInst *St = dyn_cast<StoreInst>(it);
|
||||||
if (!St) {
|
if (!St) {
|
||||||
emitAnalysis(LoopAccessReport(it) <<
|
emitAnalysis(VectorizationReport(it) <<
|
||||||
"instruction cannot be vectorized");
|
"instruction cannot be vectorized");
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if (!St->isSimple() && !IsAnnotatedParallel) {
|
if (!St->isSimple() && !IsAnnotatedParallel) {
|
||||||
emitAnalysis(LoopAccessReport(St)
|
emitAnalysis(VectorizationReport(St)
|
||||||
<< "write with atomic ordering or volatile write");
|
<< "write with atomic ordering or volatile write");
|
||||||
DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
|
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
NumStores++;
|
NumStores++;
|
||||||
Stores.push_back(St);
|
Stores.push_back(St);
|
||||||
@ -977,9 +909,8 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
// Check if we see any stores. If there are no stores, then we don't
|
// Check if we see any stores. If there are no stores, then we don't
|
||||||
// care if the pointers are *restrict*.
|
// care if the pointers are *restrict*.
|
||||||
if (!Stores.size()) {
|
if (!Stores.size()) {
|
||||||
DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
|
DEBUG(dbgs() << "LV: Found a read-only loop!\n");
|
||||||
CanVecMem = true;
|
return true;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AccessAnalysis::DepCandidates DependentAccesses;
|
AccessAnalysis::DepCandidates DependentAccesses;
|
||||||
@ -999,11 +930,10 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
|
|
||||||
if (isUniform(Ptr)) {
|
if (isUniform(Ptr)) {
|
||||||
emitAnalysis(
|
emitAnalysis(
|
||||||
LoopAccessReport(ST)
|
VectorizationReport(ST)
|
||||||
<< "write to a loop invariant address could not be vectorized");
|
<< "write to a loop invariant address could not be vectorized");
|
||||||
DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
|
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we did *not* see this pointer before, insert it to the read-write
|
// If we did *not* see this pointer before, insert it to the read-write
|
||||||
@ -1024,10 +954,9 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
|
|
||||||
if (IsAnnotatedParallel) {
|
if (IsAnnotatedParallel) {
|
||||||
DEBUG(dbgs()
|
DEBUG(dbgs()
|
||||||
<< "LAA: A loop annotated parallel, ignore memory dependency "
|
<< "LV: A loop annotated parallel, ignore memory dependency "
|
||||||
<< "checks.\n");
|
<< "checks.\n");
|
||||||
CanVecMem = true;
|
return true;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
|
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
|
||||||
@ -1061,9 +990,8 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
// If we write (or read-write) to a single destination and there are no
|
// If we write (or read-write) to a single destination and there are no
|
||||||
// other reads in this loop then is it safe to vectorize.
|
// other reads in this loop then is it safe to vectorize.
|
||||||
if (NumReadWrites == 1 && NumReads == 0) {
|
if (NumReadWrites == 1 && NumReads == 0) {
|
||||||
DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
|
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
|
||||||
CanVecMem = true;
|
return true;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build dependence sets and check whether we need a runtime pointer bounds
|
// Build dependence sets and check whether we need a runtime pointer bounds
|
||||||
@ -1079,7 +1007,7 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
|
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
|
||||||
Strides);
|
Strides);
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
|
DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
|
||||||
" pointer comparisons.\n");
|
" pointer comparisons.\n");
|
||||||
|
|
||||||
// If we only have one set of dependences to check pointers among we don't
|
// If we only have one set of dependences to check pointers among we don't
|
||||||
@ -1089,36 +1017,34 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
|
|
||||||
// Check that we did not collect too many pointers or found an unsizeable
|
// Check that we did not collect too many pointers or found an unsizeable
|
||||||
// pointer.
|
// pointer.
|
||||||
if (!CanDoRT ||
|
if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
|
||||||
NumComparisons > VectorizerParams::RuntimeMemoryCheckThreshold) {
|
|
||||||
PtrRtCheck.reset();
|
PtrRtCheck.reset();
|
||||||
CanDoRT = false;
|
CanDoRT = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CanDoRT) {
|
if (CanDoRT) {
|
||||||
DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
|
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NeedRTCheck && !CanDoRT) {
|
if (NeedRTCheck && !CanDoRT) {
|
||||||
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
|
emitAnalysis(VectorizationReport() << "cannot identify array bounds");
|
||||||
DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
|
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
|
||||||
"the array bounds.\n");
|
"the array bounds.\n");
|
||||||
PtrRtCheck.reset();
|
PtrRtCheck.reset();
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PtrRtCheck.Need = NeedRTCheck;
|
PtrRtCheck.Need = NeedRTCheck;
|
||||||
|
|
||||||
CanVecMem = true;
|
bool CanVecMem = true;
|
||||||
if (Accesses.isDependencyCheckNeeded()) {
|
if (Accesses.isDependencyCheckNeeded()) {
|
||||||
DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
|
DEBUG(dbgs() << "LV: Checking memory dependencies\n");
|
||||||
CanVecMem = DepChecker.areDepsSafe(
|
CanVecMem = DepChecker.areDepsSafe(
|
||||||
DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
|
DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
|
||||||
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
|
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
|
||||||
|
|
||||||
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
|
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
|
||||||
DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
|
DEBUG(dbgs() << "LV: Retrying with memory checks\n");
|
||||||
NeedRTCheck = true;
|
NeedRTCheck = true;
|
||||||
|
|
||||||
// Clear the dependency checks. We assume they are not needed.
|
// Clear the dependency checks. We assume they are not needed.
|
||||||
@ -1131,20 +1057,18 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
TheLoop, Strides, true);
|
TheLoop, Strides, true);
|
||||||
// Check that we did not collect too many pointers or found an unsizeable
|
// Check that we did not collect too many pointers or found an unsizeable
|
||||||
// pointer.
|
// pointer.
|
||||||
if (!CanDoRT ||
|
if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
|
||||||
NumComparisons > VectorizerParams::RuntimeMemoryCheckThreshold) {
|
|
||||||
if (!CanDoRT && NumComparisons > 0)
|
if (!CanDoRT && NumComparisons > 0)
|
||||||
emitAnalysis(LoopAccessReport()
|
emitAnalysis(VectorizationReport()
|
||||||
<< "cannot check memory dependencies at runtime");
|
<< "cannot check memory dependencies at runtime");
|
||||||
else
|
else
|
||||||
emitAnalysis(LoopAccessReport()
|
emitAnalysis(VectorizationReport()
|
||||||
<< NumComparisons << " exceeds limit of "
|
<< NumComparisons << " exceeds limit of "
|
||||||
<< VectorizerParams::RuntimeMemoryCheckThreshold
|
<< VectParams.RuntimeMemoryCheckThreshold
|
||||||
<< " dependent memory operations checked at runtime");
|
<< " dependent memory operations checked at runtime");
|
||||||
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
|
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
|
||||||
PtrRtCheck.reset();
|
PtrRtCheck.reset();
|
||||||
CanVecMem = false;
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CanVecMem = true;
|
CanVecMem = true;
|
||||||
@ -1152,11 +1076,13 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!CanVecMem)
|
if (!CanVecMem)
|
||||||
emitAnalysis(LoopAccessReport() <<
|
emitAnalysis(VectorizationReport() <<
|
||||||
"unsafe dependent memory operations in loop");
|
"unsafe dependent memory operations in loop");
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") <<
|
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
|
||||||
" need a runtime memory check.\n");
|
" need a runtime memory check.\n");
|
||||||
|
|
||||||
|
return CanVecMem;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
|
bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
|
||||||
@ -1168,12 +1094,11 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
|
|||||||
return !DT->dominates(BB, Latch);
|
return !DT->dominates(BB, Latch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
|
void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
|
||||||
assert(!Report && "Multiple report generated");
|
VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
|
||||||
Report = Message;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopAccessInfo::isUniform(Value *V) const {
|
bool LoopAccessInfo::isUniform(Value *V) {
|
||||||
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
|
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1189,7 +1114,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Instruction *, Instruction *>
|
std::pair<Instruction *, Instruction *>
|
||||||
LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
|
LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
|
||||||
Instruction *tnullptr = nullptr;
|
Instruction *tnullptr = nullptr;
|
||||||
if (!PtrRtCheck.Need)
|
if (!PtrRtCheck.Need)
|
||||||
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
|
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
|
||||||
@ -1207,12 +1132,12 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
|
|||||||
const SCEV *Sc = SE->getSCEV(Ptr);
|
const SCEV *Sc = SE->getSCEV(Ptr);
|
||||||
|
|
||||||
if (SE->isLoopInvariant(Sc, TheLoop)) {
|
if (SE->isLoopInvariant(Sc, TheLoop)) {
|
||||||
DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
|
DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
|
||||||
*Ptr <<"\n");
|
*Ptr <<"\n");
|
||||||
Starts.push_back(Ptr);
|
Starts.push_back(Ptr);
|
||||||
Ends.push_back(Ptr);
|
Ends.push_back(Ptr);
|
||||||
} else {
|
} else {
|
||||||
DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
|
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
|
||||||
unsigned AS = Ptr->getType()->getPointerAddressSpace();
|
unsigned AS = Ptr->getType()->getPointerAddressSpace();
|
||||||
|
|
||||||
// Use this type for pointer arithmetic.
|
// Use this type for pointer arithmetic.
|
||||||
@ -1272,100 +1197,3 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
|
|||||||
FirstInst = getFirstInst(FirstInst, Check, Loc);
|
FirstInst = getFirstInst(FirstInst, Check, Loc);
|
||||||
return std::make_pair(FirstInst, Check);
|
return std::make_pair(FirstInst, Check);
|
||||||
}
|
}
|
||||||
|
|
||||||
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
|
|
||||||
const DataLayout *DL,
|
|
||||||
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
|
||||||
DominatorTree *DT, ValueToValueMap &Strides)
|
|
||||||
: TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0),
|
|
||||||
NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) {
|
|
||||||
if (canAnalyzeLoop())
|
|
||||||
analyzeLoop(Strides);
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
|
|
||||||
if (CanVecMem) {
|
|
||||||
if (PtrRtCheck.empty())
|
|
||||||
OS.indent(Depth) << "Memory dependences are safe\n";
|
|
||||||
else
|
|
||||||
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Report)
|
|
||||||
OS.indent(Depth) << "Report: " << Report->str() << "\n";
|
|
||||||
|
|
||||||
// FIXME: Print unsafe dependences
|
|
||||||
|
|
||||||
// List the pair of accesses need run-time checks to prove independence.
|
|
||||||
PtrRtCheck.print(OS, Depth);
|
|
||||||
OS << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L,
|
|
||||||
ValueToValueMap &Strides) {
|
|
||||||
auto &LAI = LoopAccessInfoMap[L];
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
|
|
||||||
"Symbolic strides changed for loop");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!LAI) {
|
|
||||||
LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
|
|
||||||
#ifndef NDEBUG
|
|
||||||
LAI->NumSymbolicStrides = Strides.size();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
return *LAI.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
|
|
||||||
LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
|
|
||||||
|
|
||||||
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
||||||
ValueToValueMap NoSymbolicStrides;
|
|
||||||
|
|
||||||
for (Loop *TopLevelLoop : *LI)
|
|
||||||
for (Loop *L : depth_first(TopLevelLoop)) {
|
|
||||||
OS.indent(2) << L->getHeader()->getName() << ":\n";
|
|
||||||
auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
|
|
||||||
LAI.print(OS, 4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool LoopAccessAnalysis::runOnFunction(Function &F) {
|
|
||||||
SE = &getAnalysis<ScalarEvolution>();
|
|
||||||
DL = F.getParent()->getDataLayout();
|
|
||||||
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
|
|
||||||
TLI = TLIP ? &TLIP->getTLI() : nullptr;
|
|
||||||
AA = &getAnalysis<AliasAnalysis>();
|
|
||||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
||||||
AU.addRequired<ScalarEvolution>();
|
|
||||||
AU.addRequired<AliasAnalysis>();
|
|
||||||
AU.addRequired<DominatorTreeWrapperPass>();
|
|
||||||
AU.addRequired<LoopInfoWrapperPass>();
|
|
||||||
|
|
||||||
AU.setPreservesAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
char LoopAccessAnalysis::ID = 0;
|
|
||||||
static const char laa_name[] = "Loop Access Analysis";
|
|
||||||
#define LAA_NAME "loop-accesses"
|
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
|
|
||||||
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
|
||||||
INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
Pass *createLAAPass() {
|
|
||||||
return new LoopAccessAnalysis();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -46,7 +46,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
|||||||
initializeJumpThreadingPass(Registry);
|
initializeJumpThreadingPass(Registry);
|
||||||
initializeLICMPass(Registry);
|
initializeLICMPass(Registry);
|
||||||
initializeLoopDeletionPass(Registry);
|
initializeLoopDeletionPass(Registry);
|
||||||
initializeLoopAccessAnalysisPass(Registry);
|
|
||||||
initializeLoopInstSimplifyPass(Registry);
|
initializeLoopInstSimplifyPass(Registry);
|
||||||
initializeLoopRotatePass(Registry);
|
initializeLoopRotatePass(Registry);
|
||||||
initializeLoopStrengthReducePass(Registry);
|
initializeLoopStrengthReducePass(Registry);
|
||||||
|
@ -106,19 +106,14 @@ using namespace llvm::PatternMatch;
|
|||||||
STATISTIC(LoopsVectorized, "Number of loops vectorized");
|
STATISTIC(LoopsVectorized, "Number of loops vectorized");
|
||||||
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
|
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
|
||||||
|
|
||||||
static cl::opt<unsigned, true>
|
static cl::opt<unsigned>
|
||||||
VectorizationFactor("force-vector-width", cl::Hidden,
|
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
|
||||||
cl::desc("Sets the SIMD width. Zero is autoselect."),
|
cl::desc("Sets the SIMD width. Zero is autoselect."));
|
||||||
cl::location(VectorizerParams::VectorizationFactor));
|
|
||||||
unsigned VectorizerParams::VectorizationFactor = 0;
|
|
||||||
|
|
||||||
static cl::opt<unsigned, true>
|
static cl::opt<unsigned>
|
||||||
VectorizationInterleave("force-vector-interleave", cl::Hidden,
|
VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
|
||||||
cl::desc("Sets the vectorization interleave count. "
|
cl::desc("Sets the vectorization interleave count. "
|
||||||
"Zero is autoselect."),
|
"Zero is autoselect."));
|
||||||
cl::location(
|
|
||||||
VectorizerParams::VectorizationInterleave));
|
|
||||||
unsigned VectorizerParams::VectorizationInterleave = 0;
|
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
|
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
|
||||||
@ -152,10 +147,10 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
|
|||||||
|
|
||||||
/// When performing memory disambiguation checks at runtime do not make more
|
/// When performing memory disambiguation checks at runtime do not make more
|
||||||
/// than this number of comparisons.
|
/// than this number of comparisons.
|
||||||
const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
|
static const unsigned RuntimeMemoryCheckThreshold = 8;
|
||||||
|
|
||||||
/// Maximum simd width.
|
/// Maximum simd width.
|
||||||
const unsigned VectorizerParams::MaxVectorWidth = 64;
|
static const unsigned MaxVectorWidth = 64;
|
||||||
|
|
||||||
static cl::opt<unsigned> ForceTargetNumScalarRegs(
|
static cl::opt<unsigned> ForceTargetNumScalarRegs(
|
||||||
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
|
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
|
||||||
@ -224,21 +219,6 @@ class LoopVectorizationLegality;
|
|||||||
class LoopVectorizationCostModel;
|
class LoopVectorizationCostModel;
|
||||||
class LoopVectorizeHints;
|
class LoopVectorizeHints;
|
||||||
|
|
||||||
/// \brief This modifies LoopAccessReport to initialize message with
|
|
||||||
/// loop-vectorizer-specific part.
|
|
||||||
class VectorizationReport : public LoopAccessReport {
|
|
||||||
public:
|
|
||||||
VectorizationReport(Instruction *I = nullptr)
|
|
||||||
: LoopAccessReport("loop not vectorized: ", I) {}
|
|
||||||
|
|
||||||
/// \brief This allows promotion of the loop-access analysis report into the
|
|
||||||
/// loop-vectorizer report. It modifies the message to add the
|
|
||||||
/// loop-vectorizer-specific part of the message.
|
|
||||||
explicit VectorizationReport(const LoopAccessReport &R)
|
|
||||||
: LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
|
|
||||||
R.getInstr()) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// InnerLoopVectorizer vectorizes loops which contain only one basic
|
/// InnerLoopVectorizer vectorizes loops which contain only one basic
|
||||||
/// block to a specified vectorization factor (VF).
|
/// block to a specified vectorization factor (VF).
|
||||||
/// This class performs the widening of scalars into vectors, or multiple
|
/// This class performs the widening of scalars into vectors, or multiple
|
||||||
@ -567,11 +547,15 @@ public:
|
|||||||
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
|
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
|
||||||
DominatorTree *DT, TargetLibraryInfo *TLI,
|
DominatorTree *DT, TargetLibraryInfo *TLI,
|
||||||
AliasAnalysis *AA, Function *F,
|
AliasAnalysis *AA, Function *F,
|
||||||
const TargetTransformInfo *TTI,
|
const TargetTransformInfo *TTI)
|
||||||
LoopAccessAnalysis *LAA)
|
|
||||||
: NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
|
: NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
|
||||||
TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
|
TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), Induction(nullptr),
|
||||||
Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
|
WidestIndTy(nullptr),
|
||||||
|
LAI(F, L, SE, DL, TLI, AA, DT,
|
||||||
|
LoopAccessInfo::VectorizerParams(
|
||||||
|
MaxVectorWidth, VectorizationFactor, VectorizationInterleave,
|
||||||
|
RuntimeMemoryCheckThreshold)),
|
||||||
|
HasFunNoNaNAttr(false) {}
|
||||||
|
|
||||||
/// This enum represents the kinds of reductions that we support.
|
/// This enum represents the kinds of reductions that we support.
|
||||||
enum ReductionKind {
|
enum ReductionKind {
|
||||||
@ -756,19 +740,19 @@ public:
|
|||||||
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
|
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
|
||||||
|
|
||||||
/// Returns the information that we collected about runtime memory check.
|
/// Returns the information that we collected about runtime memory check.
|
||||||
const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
|
LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() {
|
||||||
return LAI->getRuntimePointerCheck();
|
return LAI.getRuntimePointerCheck();
|
||||||
}
|
}
|
||||||
|
|
||||||
const LoopAccessInfo *getLAI() const {
|
LoopAccessInfo *getLAI() {
|
||||||
return LAI;
|
return &LAI;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This function returns the identity element (or neutral element) for
|
/// This function returns the identity element (or neutral element) for
|
||||||
/// the operation K.
|
/// the operation K.
|
||||||
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
|
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
|
||||||
|
|
||||||
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
|
unsigned getMaxSafeDepDistBytes() { return LAI.getMaxSafeDepDistBytes(); }
|
||||||
|
|
||||||
bool hasStride(Value *V) { return StrideSet.count(V); }
|
bool hasStride(Value *V) { return StrideSet.count(V); }
|
||||||
bool mustCheckStrides() { return !StrideSet.empty(); }
|
bool mustCheckStrides() { return !StrideSet.empty(); }
|
||||||
@ -793,10 +777,10 @@ public:
|
|||||||
return (MaskedOp.count(I) != 0);
|
return (MaskedOp.count(I) != 0);
|
||||||
}
|
}
|
||||||
unsigned getNumStores() const {
|
unsigned getNumStores() const {
|
||||||
return LAI->getNumStores();
|
return LAI.getNumStores();
|
||||||
}
|
}
|
||||||
unsigned getNumLoads() const {
|
unsigned getNumLoads() const {
|
||||||
return LAI->getNumLoads();
|
return LAI.getNumLoads();
|
||||||
}
|
}
|
||||||
unsigned getNumPredStores() const {
|
unsigned getNumPredStores() const {
|
||||||
return NumPredStores;
|
return NumPredStores;
|
||||||
@ -850,11 +834,9 @@ private:
|
|||||||
void collectStridedAccess(Value *LoadOrStoreInst);
|
void collectStridedAccess(Value *LoadOrStoreInst);
|
||||||
|
|
||||||
/// Report an analysis message to assist the user in diagnosing loops that are
|
/// Report an analysis message to assist the user in diagnosing loops that are
|
||||||
/// not vectorized. These are handled as LoopAccessReport rather than
|
/// not vectorized.
|
||||||
/// VectorizationReport because the << operator of VectorizationReport returns
|
void emitAnalysis(VectorizationReport &Message) {
|
||||||
/// LoopAccessReport.
|
VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
|
||||||
void emitAnalysis(const LoopAccessReport &Message) {
|
|
||||||
LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned NumPredStores;
|
unsigned NumPredStores;
|
||||||
@ -873,11 +855,6 @@ private:
|
|||||||
const TargetTransformInfo *TTI;
|
const TargetTransformInfo *TTI;
|
||||||
/// Dominator Tree.
|
/// Dominator Tree.
|
||||||
DominatorTree *DT;
|
DominatorTree *DT;
|
||||||
// LoopAccess analysis.
|
|
||||||
LoopAccessAnalysis *LAA;
|
|
||||||
// And the loop-accesses info corresponding to this loop. This pointer is
|
|
||||||
// null until canVectorizeMemory sets it up.
|
|
||||||
const LoopAccessInfo *LAI;
|
|
||||||
|
|
||||||
// --- vectorization state --- //
|
// --- vectorization state --- //
|
||||||
|
|
||||||
@ -899,7 +876,7 @@ private:
|
|||||||
/// This set holds the variables which are known to be uniform after
|
/// This set holds the variables which are known to be uniform after
|
||||||
/// vectorization.
|
/// vectorization.
|
||||||
SmallPtrSet<Instruction*, 4> Uniforms;
|
SmallPtrSet<Instruction*, 4> Uniforms;
|
||||||
|
LoopAccessInfo LAI;
|
||||||
/// Can we assume the absence of NaNs.
|
/// Can we assume the absence of NaNs.
|
||||||
bool HasFunNoNaNAttr;
|
bool HasFunNoNaNAttr;
|
||||||
|
|
||||||
@ -989,11 +966,9 @@ private:
|
|||||||
bool isConsecutiveLoadOrStore(Instruction *I);
|
bool isConsecutiveLoadOrStore(Instruction *I);
|
||||||
|
|
||||||
/// Report an analysis message to assist the user in diagnosing loops that are
|
/// Report an analysis message to assist the user in diagnosing loops that are
|
||||||
/// not vectorized. These are handled as LoopAccessReport rather than
|
/// not vectorized.
|
||||||
/// VectorizationReport because the << operator of VectorizationReport returns
|
void emitAnalysis(VectorizationReport &Message) {
|
||||||
/// LoopAccessReport.
|
VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
|
||||||
void emitAnalysis(const LoopAccessReport &Message) {
|
|
||||||
LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Values used only by @llvm.assume calls.
|
/// Values used only by @llvm.assume calls.
|
||||||
@ -1046,7 +1021,7 @@ class LoopVectorizeHints {
|
|||||||
bool validate(unsigned Val) {
|
bool validate(unsigned Val) {
|
||||||
switch (Kind) {
|
switch (Kind) {
|
||||||
case HK_WIDTH:
|
case HK_WIDTH:
|
||||||
return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
|
return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
|
||||||
case HK_UNROLL:
|
case HK_UNROLL:
|
||||||
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
|
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
|
||||||
case HK_FORCE:
|
case HK_FORCE:
|
||||||
@ -1282,7 +1257,6 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
TargetLibraryInfo *TLI;
|
TargetLibraryInfo *TLI;
|
||||||
AliasAnalysis *AA;
|
AliasAnalysis *AA;
|
||||||
AssumptionCache *AC;
|
AssumptionCache *AC;
|
||||||
LoopAccessAnalysis *LAA;
|
|
||||||
bool DisableUnrolling;
|
bool DisableUnrolling;
|
||||||
bool AlwaysVectorize;
|
bool AlwaysVectorize;
|
||||||
|
|
||||||
@ -1300,7 +1274,6 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
TLI = TLIP ? &TLIP->getTLI() : nullptr;
|
TLI = TLIP ? &TLIP->getTLI() : nullptr;
|
||||||
AA = &getAnalysis<AliasAnalysis>();
|
AA = &getAnalysis<AliasAnalysis>();
|
||||||
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||||
LAA = &getAnalysis<LoopAccessAnalysis>();
|
|
||||||
|
|
||||||
// Compute some weights outside of the loop over the loops. Compute this
|
// Compute some weights outside of the loop over the loops. Compute this
|
||||||
// using a BranchProbability to re-use its scaling math.
|
// using a BranchProbability to re-use its scaling math.
|
||||||
@ -1411,7 +1384,7 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if it is legal to vectorize the loop.
|
// Check if it is legal to vectorize the loop.
|
||||||
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA);
|
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
|
||||||
if (!LVL.canVectorize()) {
|
if (!LVL.canVectorize()) {
|
||||||
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
|
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
|
||||||
emitMissedWarning(F, L, Hints);
|
emitMissedWarning(F, L, Hints);
|
||||||
@ -1516,7 +1489,6 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
AU.addRequired<ScalarEvolution>();
|
AU.addRequired<ScalarEvolution>();
|
||||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||||
AU.addRequired<AliasAnalysis>();
|
AU.addRequired<AliasAnalysis>();
|
||||||
AU.addRequired<LoopAccessAnalysis>();
|
|
||||||
AU.addPreserved<LoopInfoWrapperPass>();
|
AU.addPreserved<LoopInfoWrapperPass>();
|
||||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||||
AU.addPreserved<AliasAnalysis>();
|
AU.addPreserved<AliasAnalysis>();
|
||||||
@ -1688,7 +1660,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::isUniform(Value *V) {
|
bool LoopVectorizationLegality::isUniform(Value *V) {
|
||||||
return LAI->isUniform(V);
|
return LAI.isUniform(V);
|
||||||
}
|
}
|
||||||
|
|
||||||
InnerLoopVectorizer::VectorParts&
|
InnerLoopVectorizer::VectorParts&
|
||||||
@ -3428,7 +3400,7 @@ bool LoopVectorizationLegality::canVectorize() {
|
|||||||
collectLoopUniforms();
|
collectLoopUniforms();
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
|
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
|
||||||
(LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
|
(LAI.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
|
||||||
"")
|
"")
|
||||||
<<"!\n");
|
<<"!\n");
|
||||||
|
|
||||||
@ -3853,11 +3825,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::canVectorizeMemory() {
|
bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||||
LAI = &LAA->getInfo(TheLoop, Strides);
|
return LAI.canVectorizeMemory(Strides);
|
||||||
auto &OptionalReport = LAI->getReport();
|
|
||||||
if (OptionalReport)
|
|
||||||
emitAnalysis(VectorizationReport(*OptionalReport));
|
|
||||||
return LAI->canVectorizeMemory();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hasMultipleUsesOf(Instruction *I,
|
static bool hasMultipleUsesOf(Instruction *I,
|
||||||
@ -5032,7 +5000,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
|||||||
INITIALIZE_PASS_DEPENDENCY(LCSSA)
|
INITIALIZE_PASS_DEPENDENCY(LCSSA)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
|
|
||||||
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
|
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
@ -1,60 +0,0 @@
|
|||||||
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
|
|
||||||
|
|
||||||
; FIXME: This is the non-debug version of unsafe-and-rt-checks.ll not
|
|
||||||
; requiring "asserts". Once we can check memory dependences without -debug,
|
|
||||||
; we should remove this test.
|
|
||||||
|
|
||||||
; Analyze this loop:
|
|
||||||
; for (i = 0; i < n; i++)
|
|
||||||
; A[i + 1] = A[i] * B[i] * C[i];
|
|
||||||
|
|
||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
target triple = "x86_64-apple-macosx10.10.0"
|
|
||||||
|
|
||||||
; CHECK: Report: unsafe dependent memory operations in loop
|
|
||||||
|
|
||||||
; CHECK: Run-time memory checks:
|
|
||||||
; CHECK-NEXT: 0:
|
|
||||||
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3
|
|
||||||
; CHECK-NEXT: 1:
|
|
||||||
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3
|
|
||||||
|
|
||||||
@n = global i32 20, align 4
|
|
||||||
@B = common global i16* null, align 8
|
|
||||||
@A = common global i16* null, align 8
|
|
||||||
@C = common global i16* null, align 8
|
|
||||||
|
|
||||||
define void @f() {
|
|
||||||
entry:
|
|
||||||
%a = load i16** @A, align 8
|
|
||||||
%b = load i16** @B, align 8
|
|
||||||
%c = load i16** @C, align 8
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body: ; preds = %for.body, %entry
|
|
||||||
%storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
||||||
|
|
||||||
%arrayidxA = getelementptr inbounds i16* %a, i64 %storemerge3
|
|
||||||
%loadA = load i16* %arrayidxA, align 2
|
|
||||||
|
|
||||||
%arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3
|
|
||||||
%loadB = load i16* %arrayidxB, align 2
|
|
||||||
|
|
||||||
%arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3
|
|
||||||
%loadC = load i16* %arrayidxC, align 2
|
|
||||||
|
|
||||||
%mul = mul i16 %loadB, %loadA
|
|
||||||
%mul1 = mul i16 %mul, %loadC
|
|
||||||
|
|
||||||
%add = add nuw nsw i64 %storemerge3, 1
|
|
||||||
%arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
store i16 %mul1, i16* %arrayidxA_plus_2, align 2
|
|
||||||
|
|
||||||
%exitcond = icmp eq i64 %add, 20
|
|
||||||
br i1 %exitcond, label %for.end, label %for.body
|
|
||||||
|
|
||||||
for.end: ; preds = %for.body
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -1,61 +0,0 @@
|
|||||||
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
|
|
||||||
; RUN: opt -loop-accesses -analyze -debug-only=loop-accesses < %s 2>&1 | FileCheck %s --check-prefix=DEBUG
|
|
||||||
; REQUIRES: asserts
|
|
||||||
|
|
||||||
; Analyze this loop:
|
|
||||||
; for (i = 0; i < n; i++)
|
|
||||||
; A[i + 1] = A[i] * B[i] * C[i];
|
|
||||||
|
|
||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
target triple = "x86_64-apple-macosx10.10.0"
|
|
||||||
|
|
||||||
; CHECK: Report: unsafe dependent memory operations in loop
|
|
||||||
|
|
||||||
; DEBUG: LAA: Distance for %loadA = load i16* %arrayidxA, align 2 to store i16 %mul1, i16* %arrayidxA_plus_2, align 2: 2
|
|
||||||
; DEBUG-NEXT: LAA: Failure because of Positive distance 2
|
|
||||||
|
|
||||||
; CHECK: Run-time memory checks:
|
|
||||||
; CHECK-NEXT: 0:
|
|
||||||
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3
|
|
||||||
; CHECK-NEXT: 1:
|
|
||||||
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3
|
|
||||||
|
|
||||||
@n = global i32 20, align 4
|
|
||||||
@B = common global i16* null, align 8
|
|
||||||
@A = common global i16* null, align 8
|
|
||||||
@C = common global i16* null, align 8
|
|
||||||
|
|
||||||
define void @f() {
|
|
||||||
entry:
|
|
||||||
%a = load i16** @A, align 8
|
|
||||||
%b = load i16** @B, align 8
|
|
||||||
%c = load i16** @C, align 8
|
|
||||||
br label %for.body
|
|
||||||
|
|
||||||
for.body: ; preds = %for.body, %entry
|
|
||||||
%storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
||||||
|
|
||||||
%arrayidxA = getelementptr inbounds i16* %a, i64 %storemerge3
|
|
||||||
%loadA = load i16* %arrayidxA, align 2
|
|
||||||
|
|
||||||
%arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3
|
|
||||||
%loadB = load i16* %arrayidxB, align 2
|
|
||||||
|
|
||||||
%arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3
|
|
||||||
%loadC = load i16* %arrayidxC, align 2
|
|
||||||
|
|
||||||
%mul = mul i16 %loadB, %loadA
|
|
||||||
%mul1 = mul i16 %mul, %loadC
|
|
||||||
|
|
||||||
%add = add nuw nsw i64 %storemerge3, 1
|
|
||||||
%arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add
|
|
||||||
store i16 %mul1, i16* %arrayidxA_plus_2, align 2
|
|
||||||
|
|
||||||
%exitcond = icmp eq i64 %add, 20
|
|
||||||
br i1 %exitcond, label %for.end, label %for.body
|
|
||||||
|
|
||||||
for.end: ; preds = %for.body
|
|
||||||
ret void
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user