From a14d6f1ea5f96172d1d421dec2c6020a60be0729 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 10 Jan 2015 00:30:55 +0000 Subject: [PATCH] [LoopUnroll] Fix the partial unrolling threshold for small loop sizes When we compute the size of a loop, we include the branch on the backedge and the comparison feeding the conditional branch. Under normal circumstances, these don't get replicated with the rest of the loop body when we unroll. This led to the somewhat surprising behavior that really small loops would not get unrolled enough -- they could be unrolled more and the resulting loop would be below the threshold, because we were assuming they'd take (LoopSize * UnrollingFactor) instructions after unrolling, instead of (((LoopSize-2) * UnrollingFactor)+2) instructions. This fixes that computation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225565 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopUnrollPass.cpp | 17 ++++++++++++----- .../LoopUnroll/partial-unroll-optsize.ll | 19 ++++++++++++++++++- test/Transforms/LoopUnroll/runtime-loop2.ll | 2 +- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 4467112a181..fef52107f62 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -222,8 +222,11 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's - // not a problem for code quality. - if (LoopSize == 0) LoopSize = 1; + // not a problem for code quality. Also, the code using this size may assume + // that each loop has at least three instructions (likely a conditional + // branch, a comparison feeding that branch, and some kind of loop increment + // feeding that comparison instruction). + LoopSize = std::max(LoopSize, 3u); return LoopSize; } @@ -407,7 +410,11 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); - uint64_t UnrolledSize = (uint64_t)LoopSize * Count; + + // When computing the unrolled size, note that the conditional branch on the + // backedge and the comparison feeding it are not replicated like the rest of + // the loop body (which is why 2 is subtracted). + uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2; if (notDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); @@ -452,7 +459,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - Count = PartialThreshold / LoopSize; + Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2); while (Count != 0 && TripCount % Count != 0) Count--; } @@ -466,7 +473,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // the original count which satisfies the threshold limit. while (Count != 0 && UnrolledSize > PartialThreshold) { Count >>= 1; - UnrolledSize = LoopSize * Count; + UnrolledSize = (LoopSize-2) * Count + 2; } if (Count > UP.MaxCount) Count = UP.MaxCount; diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll index 3179d55e978..a650317f3df 100644 --- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll +++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s ; Loop size = 3, when the function has the optsize attribute, the ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled -; by 16 times because 3 * 16 < 50. +; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not). define void @unroll_opt_for_size() nounwind optsize { entry: br label %loop @@ -32,4 +32,21 @@ exit: ; CHECK-NEXT: add ; CHECK-NEXT: add ; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add ; CHECK-NEXT: icmp + diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll index 7205c686065..176362a3445 100644 --- a/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts