mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 06:33:24 +00:00
Fix a trip-count overflow issue in LoopUnroll.
Currently LoopUnroll generates a prologue loop before the main loop body to execute first N%UnrollFactor iterations. Also, this loop is used if trip-count can overflow - it's determined by a runtime check. However, we've been mistakenly optimizing this loop to a linear code for UnrollFactor = 2, not taking into account that it also serves as a safe version of the loop if its trip-count overflows. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222451 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d23f04a165
commit
4e7b10b07f
@ -295,6 +295,10 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
|
||||
return false;
|
||||
|
||||
// If BECount is INT_MAX, we can't compute trip-count without overflow.
|
||||
if (BECount->isAllOnesValue())
|
||||
return false;
|
||||
|
||||
// Add 1 since the backedge count doesn't include the first loop iteration
|
||||
const SCEV *TripCountSC =
|
||||
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
|
||||
@ -357,11 +361,16 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
std::vector<BasicBlock *> NewBlocks;
|
||||
ValueToValueMapTy VMap;
|
||||
|
||||
// If unroll count is 2 and we can't overflow in tripcount computation (which
|
||||
// is BECount + 1), then we don't need a loop for prologue, and we can unroll
|
||||
// it. We can be sure that we don't overflow only if tripcount is a constant.
|
||||
bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
|
||||
|
||||
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
|
||||
// the loop, otherwise we create a cloned loop to execute the extra
|
||||
// iterations. This function adds the appropriate CFG connections.
|
||||
CloneLoopBlocks(L, ModVal, Count == 2, PH, PEnd, NewBlocks, LoopBlocks, VMap,
|
||||
LI);
|
||||
CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
|
||||
VMap, LI);
|
||||
|
||||
// Insert the cloned blocks into function just before the original loop
|
||||
F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
|
||||
|
@ -3,7 +3,7 @@
|
||||
; This tests that setting the unroll count works
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: br label %for.body.preheader.split
|
||||
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
|
||||
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
30
test/Transforms/LoopUnroll/tripcount-overflow.ll
Normal file
30
test/Transforms/LoopUnroll/tripcount-overflow.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; When prologue is fully unrolled, the branch on its end is unconditional.
|
||||
; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
|
||||
; like in this example, where it comes from an argument.
|
||||
;
|
||||
; This test is based on an example from here:
|
||||
; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
|
||||
;
|
||||
; CHECK: while.body.prol:
|
||||
; CHECK: br i1
|
||||
; CHECK: entry.split:
|
||||
|
||||
; Function Attrs: nounwind readnone ssp uwtable
|
||||
define i32 @foo(i32 %N) #0 {
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
%cmp = icmp eq i32 %i, %N
|
||||
%inc = add i32 %i, 1
|
||||
br i1 %cmp, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 %i
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
Loading…
x
Reference in New Issue
Block a user