mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-07 12:28:24 +00:00
LoopVectorizer: If dependency checks fail try runtime checks
When a dependence check fails we can still try to vectorize loops with runtime array bounds checks. This helps linpack to vectorize a loop in dgefa. And we are back to 2x of the scalar performance on a corei7-avx. radar://15339680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193853 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -3061,7 +3061,7 @@ public:
|
|||||||
/// non-intersection.
|
/// non-intersection.
|
||||||
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
|
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
|
||||||
unsigned &NumComparisons, ScalarEvolution *SE,
|
unsigned &NumComparisons, ScalarEvolution *SE,
|
||||||
Loop *TheLoop);
|
Loop *TheLoop, bool ShouldCheckStride = false);
|
||||||
|
|
||||||
/// \brief Goes over all memory accesses, checks whether a RT check is needed
|
/// \brief Goes over all memory accesses, checks whether a RT check is needed
|
||||||
/// and builds sets of dependent accesses.
|
/// and builds sets of dependent accesses.
|
||||||
@@ -3075,6 +3075,7 @@ public:
|
|||||||
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
|
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
|
||||||
|
|
||||||
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
|
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
|
||||||
|
void resetDepChecks() { CheckDeps.clear(); }
|
||||||
|
|
||||||
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
|
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
|
||||||
|
|
||||||
@@ -3129,10 +3130,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
|
|||||||
return AR->isAffine();
|
return AR->isAffine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Check the stride of the pointer and ensure that it does not wrap in
|
||||||
|
/// the address space.
|
||||||
|
static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
|
||||||
|
const Loop *Lp);
|
||||||
|
|
||||||
bool AccessAnalysis::canCheckPtrAtRT(
|
bool AccessAnalysis::canCheckPtrAtRT(
|
||||||
LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
|
LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
|
||||||
unsigned &NumComparisons, ScalarEvolution *SE,
|
unsigned &NumComparisons, ScalarEvolution *SE,
|
||||||
Loop *TheLoop) {
|
Loop *TheLoop, bool ShouldCheckStride) {
|
||||||
// Find pointers with computable bounds. We are going to use this information
|
// Find pointers with computable bounds. We are going to use this information
|
||||||
// to place a runtime bound check.
|
// to place a runtime bound check.
|
||||||
unsigned NumReadPtrChecks = 0;
|
unsigned NumReadPtrChecks = 0;
|
||||||
@@ -3160,7 +3166,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
|||||||
else
|
else
|
||||||
++NumReadPtrChecks;
|
++NumReadPtrChecks;
|
||||||
|
|
||||||
if (hasComputableBounds(SE, Ptr)) {
|
if (hasComputableBounds(SE, Ptr) &&
|
||||||
|
// When we run after a failing dependency check we have to make sure we
|
||||||
|
// don't have wrapping pointers.
|
||||||
|
(!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
|
||||||
// The id of the dependence set.
|
// The id of the dependence set.
|
||||||
unsigned DepId;
|
unsigned DepId;
|
||||||
|
|
||||||
@@ -3342,8 +3351,9 @@ public:
|
|||||||
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
|
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
|
||||||
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
|
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
|
||||||
|
|
||||||
MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :
|
MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
|
||||||
SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
|
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
|
||||||
|
ShouldRetryWithRuntimeCheck(false) {}
|
||||||
|
|
||||||
/// \brief Register the location (instructions are given increasing numbers)
|
/// \brief Register the location (instructions are given increasing numbers)
|
||||||
/// of a write access.
|
/// of a write access.
|
||||||
@@ -3373,6 +3383,10 @@ public:
|
|||||||
/// the accesses safely with.
|
/// the accesses safely with.
|
||||||
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
|
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
|
||||||
|
|
||||||
|
/// \brief In same cases when the dependency check fails we can still
|
||||||
|
/// vectorize the loop with a dynamic array access check.
|
||||||
|
bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ScalarEvolution *SE;
|
ScalarEvolution *SE;
|
||||||
DataLayout *DL;
|
DataLayout *DL;
|
||||||
@@ -3390,6 +3404,10 @@ private:
|
|||||||
// We can access this many bytes in parallel safely.
|
// We can access this many bytes in parallel safely.
|
||||||
unsigned MaxSafeDepDistBytes;
|
unsigned MaxSafeDepDistBytes;
|
||||||
|
|
||||||
|
/// \brief If we see a non constant dependence distance we can still try to
|
||||||
|
/// vectorize this loop with runtime checks.
|
||||||
|
bool ShouldRetryWithRuntimeCheck;
|
||||||
|
|
||||||
/// \brief Check whether there is a plausible dependence between the two
|
/// \brief Check whether there is a plausible dependence between the two
|
||||||
/// accesses.
|
/// accesses.
|
||||||
///
|
///
|
||||||
@@ -3587,6 +3605,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
|
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
|
||||||
if (!C) {
|
if (!C) {
|
||||||
DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
|
DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
|
||||||
|
ShouldRetryWithRuntimeCheck = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3876,6 +3895,29 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
|||||||
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
|
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
|
||||||
Accesses.getDependenciesToCheck());
|
Accesses.getDependenciesToCheck());
|
||||||
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
|
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
|
||||||
|
|
||||||
|
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
|
||||||
|
DEBUG(dbgs() << "LV: Retrying with memory checks\n");
|
||||||
|
NeedRTCheck = true;
|
||||||
|
|
||||||
|
// Clear the dependency checks. We assume they are not needed.
|
||||||
|
Accesses.resetDepChecks();
|
||||||
|
|
||||||
|
PtrRtCheck.reset();
|
||||||
|
PtrRtCheck.Need = true;
|
||||||
|
|
||||||
|
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
|
||||||
|
TheLoop, true);
|
||||||
|
// Check that we did not collect too many pointers or found an unsizeable
|
||||||
|
// pointer.
|
||||||
|
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
|
||||||
|
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
|
||||||
|
PtrRtCheck.reset();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
CanVecMem = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
|
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
|
||||||
|
@@ -34,3 +34,31 @@ for.body: ; preds = %entry, %for.body
|
|||||||
for.end: ; preds = %for.body, %entry
|
for.end: ; preds = %for.body, %entry
|
||||||
ret i32 undef
|
ret i32 undef
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Make sure that we try to vectorize loops with a runtime check if the
|
||||||
|
; dependency check fails.
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_runtime_check
|
||||||
|
; CHECK: <4 x float>
|
||||||
|
define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||||
|
%ind.sum = add i64 %iv, %offset
|
||||||
|
%arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
|
||||||
|
%l1 = load float* %arr.idx, align 4
|
||||||
|
%ind.sum2 = add i64 %iv, %offset2
|
||||||
|
%arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
|
||||||
|
%l2 = load float* %arr.idx2, align 4
|
||||||
|
%m = fmul fast float %b, %l2
|
||||||
|
%ad = fadd fast float %l1, %m
|
||||||
|
store float %ad, float* %arr.idx, align 4
|
||||||
|
%iv.next = add nuw nsw i64 %iv, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.next, %n
|
||||||
|
br i1 %exitcond, label %loopexit, label %for.body
|
||||||
|
|
||||||
|
loopexit:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user