mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-27 13:30:05 +00:00
a14d6f1ea5
When we compute the size of a loop, we include the branch on the backedge and the comparison feeding the conditional branch. Under normal circumstances, these don't get replicated with the rest of the loop body when we unroll. This led to the somewhat surprising behavior that really small loops would not get unrolled enough -- they could be unrolled more and the resulting loop would be below the threshold, because we were assuming they'd take (LoopSize * UnrollingFactor) instructions after unrolling, instead of (((LoopSize-2) * UnrollingFactor)+2) instructions. This fixes that computation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225565 91177308-0d34-0410-b5e6-96231b3b80d8
31 lines
1.2 KiB
LLVM
31 lines
1.2 KiB
LLVM
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
|
|
|
|
; Choose a smaller, power-of-two, unroll count if the loop is too large.
|
|
; This test makes sure we're not unrolling 'odd' counts
|
|
|
|
; CHECK: for.body.prol:
|
|
; CHECK: for.body:
|
|
; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
|
|
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
|
|
|
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
|
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
|
|
%0 = load i32* %arrayidx, align 4
|
|
%add = add nsw i32 %0, %sum.02
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|