mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-30 05:24:22 +00:00
[LoopUnrollRuntime] Avoid high-cost trip count computation.
Summary: Runtime unrolling of loops needs to emit an expression to compute the loop's runtime trip-count. Avoid runtime unrolling if this computation will be expensive. Depends on D8993. Reviewers: atrick Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8994 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234846 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -261,6 +261,9 @@ public:
|
|||||||
/// loop body even when the number of loop iterations is not known at
|
/// loop body even when the number of loop iterations is not known at
|
||||||
/// compile time).
|
/// compile time).
|
||||||
bool Runtime;
|
bool Runtime;
|
||||||
|
/// Allow emitting expensive instructions (such as divisions) when computing
|
||||||
|
/// the trip count of a loop for runtime unrolling.
|
||||||
|
bool AllowExpensiveTripCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \brief Get target-customized preferences for the generic loop unrolling
|
/// \brief Get target-customized preferences for the generic loop unrolling
|
||||||
|
@ -28,11 +28,13 @@ class MDNode;
|
|||||||
class Pass;
|
class Pass;
|
||||||
|
|
||||||
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
|
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
|
||||||
unsigned TripMultiple, LoopInfo *LI, Pass *PP,
|
bool AllowExpensiveTripCount, unsigned TripMultiple,
|
||||||
LPPassManager *LPM, AssumptionCache *AC);
|
LoopInfo *LI, Pass *PP, LPPassManager *LPM,
|
||||||
|
AssumptionCache *AC);
|
||||||
|
|
||||||
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||||
LPPassManager* LPM);
|
bool AllowExpensiveTripCount, LoopInfo *LI,
|
||||||
|
LPPassManager *LPM);
|
||||||
|
|
||||||
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
|
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
|
||||||
}
|
}
|
||||||
|
@ -165,6 +165,7 @@ namespace {
|
|||||||
UP.MaxCount = UINT_MAX;
|
UP.MaxCount = UINT_MAX;
|
||||||
UP.Partial = CurrentAllowPartial;
|
UP.Partial = CurrentAllowPartial;
|
||||||
UP.Runtime = CurrentRuntime;
|
UP.Runtime = CurrentRuntime;
|
||||||
|
UP.AllowExpensiveTripCount = false;
|
||||||
TTI.getUnrollingPreferences(L, UP);
|
TTI.getUnrollingPreferences(L, UP);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Unroll the loop.
|
// Unroll the loop.
|
||||||
if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
|
if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
|
||||||
&LPM, &AC))
|
TripMultiple, LI, this, &LPM, &AC))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
|
|||||||
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
|
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
|
||||||
/// execute without exiting the loop.
|
/// execute without exiting the loop.
|
||||||
///
|
///
|
||||||
|
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
|
||||||
|
/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
|
||||||
|
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
|
||||||
|
/// iterations before branching into the unrolled loop. UnrollLoop will not
|
||||||
|
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
|
||||||
|
/// AllowExpensiveTripCount is false.
|
||||||
|
///
|
||||||
/// The LoopInfo Analysis that is passed will be kept consistent.
|
/// The LoopInfo Analysis that is passed will be kept consistent.
|
||||||
///
|
///
|
||||||
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
|
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
|
||||||
@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
|
|||||||
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
|
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
|
||||||
/// available from the Pass it must also preserve those analyses.
|
/// available from the Pass it must also preserve those analyses.
|
||||||
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||||
bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
|
bool AllowRuntime, bool AllowExpensiveTripCount,
|
||||||
Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
|
unsigned TripMultiple, LoopInfo *LI, Pass *PP,
|
||||||
|
LPPassManager *LPM, AssumptionCache *AC) {
|
||||||
BasicBlock *Preheader = L->getLoopPreheader();
|
BasicBlock *Preheader = L->getLoopPreheader();
|
||||||
if (!Preheader) {
|
if (!Preheader) {
|
||||||
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
|
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
|
||||||
@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
|||||||
// flag is specified.
|
// flag is specified.
|
||||||
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
|
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
|
||||||
|
|
||||||
if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
|
if (RuntimeTripCount &&
|
||||||
|
!UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Notify ScalarEvolution that the loop will be substantially changed,
|
// Notify ScalarEvolution that the loop will be substantially changed,
|
||||||
|
@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||||||
/// ...
|
/// ...
|
||||||
/// End:
|
/// End:
|
||||||
///
|
///
|
||||||
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||||
|
bool AllowExpensiveTripCount, LoopInfo *LI,
|
||||||
LPPassManager *LPM) {
|
LPPassManager *LPM) {
|
||||||
// for now, only unroll loops that contain a single exit
|
// for now, only unroll loops that contain a single exit
|
||||||
if (!L->getExitingBlock())
|
if (!L->getExitingBlock())
|
||||||
@ -312,6 +313,12 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
|||||||
if (isa<SCEVCouldNotCompute>(TripCountSC))
|
if (isa<SCEVCouldNotCompute>(TripCountSC))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
BasicBlock *Header = L->getHeader();
|
||||||
|
const DataLayout &DL = Header->getModule()->getDataLayout();
|
||||||
|
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
||||||
|
if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
|
||||||
|
return false;
|
||||||
|
|
||||||
// We only handle cases when the unroll factor is a power of 2.
|
// We only handle cases when the unroll factor is a power of 2.
|
||||||
// Count is the loop unroll factor, the number of extra copies added + 1.
|
// Count is the loop unroll factor, the number of extra copies added + 1.
|
||||||
if (!isPowerOf2_32(Count))
|
if (!isPowerOf2_32(Count))
|
||||||
@ -332,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
|||||||
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
|
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
|
||||||
|
|
||||||
BasicBlock *PH = L->getLoopPreheader();
|
BasicBlock *PH = L->getLoopPreheader();
|
||||||
BasicBlock *Header = L->getHeader();
|
|
||||||
BasicBlock *Latch = L->getLoopLatch();
|
BasicBlock *Latch = L->getLoopLatch();
|
||||||
// It helps to splits the original preheader twice, one for the end of the
|
// It helps to splits the original preheader twice, one for the end of the
|
||||||
// prolog code and one for a new loop preheader
|
// prolog code and one for a new loop preheader
|
||||||
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
||||||
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
||||||
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
||||||
const DataLayout &DL = Header->getModule()->getDataLayout();
|
|
||||||
|
|
||||||
// Compute the number of extra iterations required, which is:
|
// Compute the number of extra iterations required, which is:
|
||||||
// extra iterations = run-time trip count % (loop unroll factor + 1)
|
// extra iterations = run-time trip count % (loop unroll factor + 1)
|
||||||
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
|
||||||
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
|
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
|
||||||
PreHeaderBR);
|
PreHeaderBR);
|
||||||
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
|
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
|
||||||
|
@ -0,0 +1,27 @@
|
|||||||
|
; RUN: opt -S -unroll-runtime -loop-unroll < %s | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
|
;; Check that we don't emit expensive instructions to compute trip
|
||||||
|
;; counts when unrolling loops.
|
||||||
|
|
||||||
|
define i32 @test(i64 %v12, i8* %array, i64* %loc) {
|
||||||
|
; CHECK-LABEL: @test(
|
||||||
|
; CHECK-NOT: udiv
|
||||||
|
entry:
|
||||||
|
%step = load i64, i64* %loc, !range !0
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop: ; preds = %entry, %loop
|
||||||
|
%k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
|
||||||
|
%v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
|
||||||
|
store i8 0, i8* %v14
|
||||||
|
%v15 = add nuw nsw i64 %k.015, %step
|
||||||
|
%v16 = icmp slt i64 %v15, 8193
|
||||||
|
br i1 %v16, label %loop, label %loopexit
|
||||||
|
|
||||||
|
loopexit: ; preds = %loop
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = !{i64 1, i64 100}
|
@ -1,5 +1,7 @@
|
|||||||
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
|
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
; Tests for unrolling loops with run-time trip counts
|
; Tests for unrolling loops with run-time trip counts
|
||||||
|
|
||||||
; CHECK: %xtraiter = and i32 %n
|
; CHECK: %xtraiter = and i32 %n
|
||||||
|
@ -20,7 +20,8 @@ loop1:
|
|||||||
br label %loop2.header
|
br label %loop2.header
|
||||||
|
|
||||||
loop2.header:
|
loop2.header:
|
||||||
br label %loop2
|
%e = icmp uge i32 %iter, 1
|
||||||
|
br i1 %e, label %loop2, label %exit2
|
||||||
|
|
||||||
loop2:
|
loop2:
|
||||||
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
|
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
|
||||||
|
Reference in New Issue
Block a user