mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
[LoopAccesses] Allow analysis to complete in the presence of uniform stores
(Re-apply r234361 with a fix and a testcase for PR23157) Both run-time pointer checking and the dependence analysis are capable of dealing with uniform addresses. I.e. it's really just an orthogonal property of the loop that the analysis computes. Run-time pointer checking will only try to reason about SCEVAddRec pointers or else gives up. If the uniform pointer turns out the be a SCEVAddRec in an outer loop, the run-time checks generated will be correct (start and end bounds would be equal). In case of the dependence analysis, we work again with SCEVs. When compared against a loop-dependent address of the same underlying object, the difference of the two SCEVs won't be constant. This will result in returning an Unknown dependence for the pair. When compared against another uniform access, the difference would be constant and we should return the right type of dependence (forward/backward/etc). The changes also adds support to query this property of the loop and modify the vectorizer to use this. Patch by Ashutosh Nema! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234424 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7e2bc24e05
commit
cd13a3808a
@ -432,6 +432,13 @@ public:
|
||||
/// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
|
||||
unsigned NumSymbolicStrides;
|
||||
|
||||
/// \brief Checks existence of store to invariant address inside loop.
|
||||
/// If the loop has any store to invariant address, then it returns true,
|
||||
/// else returns false.
|
||||
bool hasStoreToLoopInvariantAddress() const {
|
||||
return StoreToLoopInvariantAddress;
|
||||
}
|
||||
|
||||
private:
|
||||
/// \brief Analyze the loop. Substitute symbolic strides using Strides.
|
||||
void analyzeLoop(const ValueToValueMap &Strides);
|
||||
@ -469,6 +476,10 @@ private:
|
||||
/// \brief Cache the result of analyzeLoop.
|
||||
bool CanVecMem;
|
||||
|
||||
/// \brief Indicator for storing to uniform addresses.
|
||||
/// If a loop has write to a loop invariant address then it should be true.
|
||||
bool StoreToLoopInvariantAddress;
|
||||
|
||||
/// \brief The diagnostics report generated for the analysis. E.g. why we
|
||||
/// couldn't analyze the loop.
|
||||
Optional<LoopAccessReport> Report;
|
||||
|
@ -1044,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
|
||||
for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
|
||||
StoreInst *ST = cast<StoreInst>(*I);
|
||||
Value* Ptr = ST->getPointerOperand();
|
||||
|
||||
if (isUniform(Ptr)) {
|
||||
emitAnalysis(
|
||||
LoopAccessReport(ST)
|
||||
<< "write to a loop invariant address could not be vectorized");
|
||||
DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
|
||||
CanVecMem = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for store to loop invariant address.
|
||||
StoreToLoopInvariantAddress |= isUniform(Ptr);
|
||||
// If we did *not* see this pointer before, insert it to the read-write
|
||||
// list. At this phase it is only a 'write' list.
|
||||
if (Seen.insert(Ptr).second) {
|
||||
@ -1314,7 +1306,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
|
||||
const ValueToValueMap &Strides)
|
||||
: DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
|
||||
TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
|
||||
MaxSafeDepDistBytes(-1U), CanVecMem(false) {
|
||||
MaxSafeDepDistBytes(-1U), CanVecMem(false),
|
||||
StoreToLoopInvariantAddress(false) {
|
||||
if (canAnalyzeLoop())
|
||||
analyzeLoop(Strides);
|
||||
}
|
||||
@ -1327,6 +1320,10 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
|
||||
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
|
||||
}
|
||||
|
||||
OS.indent(Depth) << "Store to invariant address was "
|
||||
<< (StoreToLoopInvariantAddress ? "" : "not ")
|
||||
<< "found in loop.\n";
|
||||
|
||||
if (Report)
|
||||
OS.indent(Depth) << "Report: " << Report->str() << "\n";
|
||||
|
||||
|
@ -4009,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
if (!LAI->canVectorizeMemory())
|
||||
return false;
|
||||
|
||||
if (LAI->hasStoreToLoopInvariantAddress()) {
|
||||
emitAnalysis(
|
||||
VectorizationReport()
|
||||
<< "write to a loop invariant address could not be vectorized");
|
||||
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (LAI->getNumRuntimePointerChecks() >
|
||||
VectorizerParams::RuntimeMemoryCheckThreshold) {
|
||||
emitAnalysis(VectorizationReport()
|
||||
|
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
|
||||
|
||||
; Test to confirm LAA will find store to invariant address.
|
||||
; Inner loop has a store to invariant address.
|
||||
;
|
||||
; for(; i < itr; i++) {
|
||||
; for(; j < itr; j++) {
|
||||
; var1[i] = var2[j] + var1[i];
|
||||
; }
|
||||
; }
|
||||
|
||||
; CHECK: Store to invariant address was found in loop.
|
||||
; CHECK-NOT: Store to invariant address was not found in loop.
|
||||
|
||||
define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
|
||||
entry:
|
||||
%cmp20 = icmp eq i32 %itr, 0
|
||||
br i1 %cmp20, label %for.end10, label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %entry, %for.inc8
|
||||
%indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
|
||||
%j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
|
||||
%cmp218 = icmp ult i32 %j.022, %itr
|
||||
br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.cond1.preheader
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
|
||||
%0 = zext i32 %j.022 to i64
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%2 = load i32, i32* %arrayidx5, align 4
|
||||
%add = add nsw i32 %2, %1
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %itr
|
||||
br i1 %exitcond, label %for.inc8, label %for.body3
|
||||
|
||||
for.inc8: ; preds = %for.body3, %for.cond1.preheader
|
||||
%j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
|
||||
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
|
||||
%exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
|
||||
br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
|
||||
|
||||
for.end10: ; preds = %for.inc8, %entry
|
||||
ret i32 undef
|
||||
}
|
||||
|
@ -0,0 +1,54 @@
|
||||
; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
|
||||
|
||||
; Test to confirm LAA will not find store to invariant address.
|
||||
; Inner loop has no store to invariant address.
|
||||
;
|
||||
; for(; i < itr; i++) {
|
||||
; for(; j < itr; j++) {
|
||||
; var2[j] = var2[j] + var1[i];
|
||||
; }
|
||||
; }
|
||||
|
||||
; CHECK: Store to invariant address was not found in loop.
|
||||
; CHECK-NOT: Store to invariant address was found in loop.
|
||||
|
||||
|
||||
define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
|
||||
entry:
|
||||
%cmp20 = icmp eq i32 %itr, 0
|
||||
br i1 %cmp20, label %for.end10, label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %entry, %for.inc8
|
||||
%indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
|
||||
%j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
|
||||
%cmp218 = icmp ult i32 %j.022, %itr
|
||||
br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.cond1.preheader
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
|
||||
%0 = zext i32 %j.022 to i64
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%2 = load i32, i32* %arrayidx5, align 4
|
||||
%add = add nsw i32 %2, %1
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %itr
|
||||
br i1 %exitcond, label %for.inc8, label %for.body3
|
||||
|
||||
for.inc8: ; preds = %for.body3, %for.cond1.preheader
|
||||
%j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
|
||||
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
|
||||
%exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
|
||||
br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
|
||||
|
||||
for.end10: ; preds = %for.inc8, %entry
|
||||
ret i32 undef
|
||||
}
|
||||
|
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
|
||||
|
||||
; Test to confirm LAA will find store to invariant address.
|
||||
; Inner loop has a store to invariant address.
|
||||
;
|
||||
; for(; i < itr; i++) {
|
||||
; for(; j < itr; j++) {
|
||||
; var1[j] = ++var2[i] + var1[j];
|
||||
; }
|
||||
; }
|
||||
|
||||
; CHECK: Store to invariant address was found in loop.
|
||||
|
||||
define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
|
||||
entry:
|
||||
%cmp20 = icmp sgt i32 %itr, 0
|
||||
br i1 %cmp20, label %for.cond1.preheader, label %for.end11
|
||||
|
||||
for.cond1.preheader: ; preds = %entry, %for.inc9
|
||||
%indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ]
|
||||
%j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ]
|
||||
%cmp218 = icmp slt i32 %j.022, %itr
|
||||
br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.cond1.preheader
|
||||
%arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23
|
||||
%0 = sext i32 %j.022 to i64
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%inc = add nsw i32 %1, 1
|
||||
store i32 %inc, i32* %arrayidx, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
|
||||
%2 = load i32, i32* %arrayidx5, align 4
|
||||
%add = add nsw i32 %inc, %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%indvars.iv.next = add nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %itr
|
||||
br i1 %exitcond, label %for.inc9, label %for.body3
|
||||
|
||||
for.inc9: ; preds = %for.body3, %for.cond1.preheader
|
||||
%j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
|
||||
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
|
||||
%exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
|
||||
br i1 %exitcond26, label %for.end11, label %for.cond1.preheader
|
||||
|
||||
for.end11: ; preds = %for.inc9, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user