mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-25 13:24:46 +00:00
scev: Better sign-extend removal. Normalize postincrement recurrences
so that their sign extended forms are congruent when no overflow occurs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132360 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
|
|||||||
return S;
|
return S;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the limit of a recurrence such that incrementing by Step cannot cause
|
||||||
|
// signed overflow as long as the value of the recurrence within the loop does
|
||||||
|
// not exceed this limit before incrementing.
|
||||||
|
static const SCEV *getOverflowLimitForStep(const SCEV *Step,
|
||||||
|
ICmpInst::Predicate *Pred,
|
||||||
|
ScalarEvolution *SE) {
|
||||||
|
unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
|
||||||
|
if (SE->isKnownPositive(Step)) {
|
||||||
|
*Pred = ICmpInst::ICMP_SLT;
|
||||||
|
return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
|
||||||
|
SE->getSignedRange(Step).getSignedMax());
|
||||||
|
}
|
||||||
|
if (SE->isKnownNegative(Step)) {
|
||||||
|
*Pred = ICmpInst::ICMP_SGT;
|
||||||
|
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
|
||||||
|
SE->getSignedRange(Step).getSignedMin());
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The recurrence AR has been shown to have no signed wrap. Typically, if we can
|
||||||
|
// prove NSW for AR, then we can just as easily prove NSW for its preincrement
|
||||||
|
// or postincrement sibling. This allows normalizing a sign extended AddRec as
|
||||||
|
// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
|
||||||
|
// result, the expression "Step + sext(PreIncAR)" is congruent with
|
||||||
|
// "sext(PostIncAR)"
|
||||||
|
static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
|
||||||
|
const Type *Ty,
|
||||||
|
ScalarEvolution *SE) {
|
||||||
|
const Loop *L = AR->getLoop();
|
||||||
|
const SCEV *Start = AR->getStart();
|
||||||
|
const SCEV *Step = AR->getStepRecurrence(*SE);
|
||||||
|
|
||||||
|
// Check for a simple looking step prior to loop entry.
|
||||||
|
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
|
||||||
|
if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// This is a postinc AR. Check for overflow on the preinc recurrence using the
|
||||||
|
// same three conditions that getSignExtendedExpr checks.
|
||||||
|
|
||||||
|
// 1. NSW flags on the step increment.
|
||||||
|
const SCEV *PreStart = SA->getOperand(1);
|
||||||
|
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
|
||||||
|
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
|
||||||
|
|
||||||
|
if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) {
|
||||||
|
return PreStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Direct overflow check on the step operation's expression.
|
||||||
|
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
|
||||||
|
const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
|
||||||
|
const SCEV *OperandExtendedStart =
|
||||||
|
SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
|
||||||
|
SE->getSignExtendExpr(Step, WideTy));
|
||||||
|
if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
|
||||||
|
// Cache knowledge of PreAR NSW.
|
||||||
|
if (PreAR)
|
||||||
|
const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
|
||||||
|
// FIXME: this optimization needs a unit test
|
||||||
|
DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
|
||||||
|
return PreStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Loop precondition.
|
||||||
|
ICmpInst::Predicate Pred;
|
||||||
|
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
|
||||||
|
|
||||||
|
if (SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
|
||||||
|
return PreStart;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the normalized sign-extended expression for this AddRec's Start.
|
||||||
|
static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
|
||||||
|
const Type *Ty,
|
||||||
|
ScalarEvolution *SE) {
|
||||||
|
const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
|
||||||
|
if (!PreStart)
|
||||||
|
return SE->getSignExtendExpr(AR->getStart(), Ty);
|
||||||
|
|
||||||
|
return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
|
||||||
|
SE->getSignExtendExpr(PreStart, Ty));
|
||||||
|
}
|
||||||
|
|
||||||
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
||||||
const Type *Ty) {
|
const Type *Ty) {
|
||||||
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
|
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
|
||||||
@@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
|||||||
// If we have special knowledge that this addrec won't overflow,
|
// If we have special knowledge that this addrec won't overflow,
|
||||||
// we don't need to do any further analysis.
|
// we don't need to do any further analysis.
|
||||||
if (AR->getNoWrapFlags(SCEV::FlagNSW))
|
if (AR->getNoWrapFlags(SCEV::FlagNSW))
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
|
||||||
getSignExtendExpr(Step, Ty),
|
getSignExtendExpr(Step, Ty),
|
||||||
L, SCEV::FlagNSW);
|
L, SCEV::FlagNSW);
|
||||||
|
|
||||||
@@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
|||||||
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
||||||
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
||||||
// Return the expression with the addrec on the outside.
|
// Return the expression with the addrec on the outside.
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
|
||||||
getSignExtendExpr(Step, Ty),
|
getSignExtendExpr(Step, Ty),
|
||||||
L, AR->getNoWrapFlags());
|
L, AR->getNoWrapFlags());
|
||||||
}
|
}
|
||||||
@@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
|||||||
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
||||||
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
||||||
// Return the expression with the addrec on the outside.
|
// Return the expression with the addrec on the outside.
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
|
||||||
getZeroExtendExpr(Step, Ty),
|
getZeroExtendExpr(Step, Ty),
|
||||||
L, AR->getNoWrapFlags());
|
L, AR->getNoWrapFlags());
|
||||||
}
|
}
|
||||||
@@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
|
|||||||
// the addrec is safe. Also, if the entry is guarded by a comparison
|
// the addrec is safe. Also, if the entry is guarded by a comparison
|
||||||
// with the start value and the backedge is guarded by a comparison
|
// with the start value and the backedge is guarded by a comparison
|
||||||
// with the post-inc value, the addrec is safe.
|
// with the post-inc value, the addrec is safe.
|
||||||
if (isKnownPositive(Step)) {
|
ICmpInst::Predicate Pred;
|
||||||
const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
|
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
|
||||||
getSignedRange(Step).getSignedMax());
|
if (OverflowLimit &&
|
||||||
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
|
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
|
||||||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
|
(isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
|
||||||
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
|
isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
|
||||||
AR->getPostIncExpr(*this), N))) {
|
OverflowLimit)))) {
|
||||||
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
|
||||||
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
||||||
// Return the expression with the addrec on the outside.
|
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
getSignExtendExpr(Step, Ty),
|
||||||
getSignExtendExpr(Step, Ty),
|
L, AR->getNoWrapFlags());
|
||||||
L, AR->getNoWrapFlags());
|
|
||||||
}
|
|
||||||
} else if (isKnownNegative(Step)) {
|
|
||||||
const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
|
|
||||||
getSignedRange(Step).getSignedMin());
|
|
||||||
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
|
|
||||||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
|
|
||||||
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
|
|
||||||
AR->getPostIncExpr(*this), N))) {
|
|
||||||
// Cache knowledge of AR NSW, which is propagated to this AddRec.
|
|
||||||
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
|
|
||||||
// Return the expression with the addrec on the outside.
|
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
|
||||||
getSignExtendExpr(Step, Ty),
|
|
||||||
L, AR->getNoWrapFlags());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,9 +2,8 @@
|
|||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
; Test reusing the same IV with constant start for preinc and postinc values
|
; IV with constant start, preinc and postinc sign extends, with and without NSW.
|
||||||
; with and without NSW.
|
; IV rewrite only removes one sext. WidenIVs removes all three.
|
||||||
; IV rewrite only removes one sext. WidenIVs should remove all three.
|
|
||||||
define void @postincConstIV(i8* %base, i32 %limit) nounwind {
|
define void @postincConstIV(i8* %base, i32 %limit) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
@@ -33,21 +32,19 @@ return:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Test reusing the same IV with nonconstant start for preinc and postinc values
|
; IV with nonconstant start, preinc and postinc sign extends,
|
||||||
; with and without NSW.
|
; with and without NSW.
|
||||||
; As with constant IV start, WidenIVs should remove all three.
|
; As with postincConstIV, WidenIVs removes all three sexts.
|
||||||
;
|
|
||||||
; FIXME: WidenIVs should remove %postofs just like %postofsnsw
|
|
||||||
define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
|
define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
%precond = icmp sgt i32 %limit, %init
|
||||||
|
br i1 %precond, label %loop, label %return
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK: sext
|
|
||||||
; CHECK-NOT: sext
|
; CHECK-NOT: sext
|
||||||
; CHECK: exit:
|
; CHECK: exit:
|
||||||
loop:
|
loop:
|
||||||
%iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
|
%iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
|
||||||
%ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ]
|
%ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ]
|
||||||
%preofs = sext i32 %iv to i64
|
%preofs = sext i32 %iv to i64
|
||||||
%preadr = getelementptr i8* %base, i64 %preofs
|
%preadr = getelementptr i8* %base, i64 %preofs
|
||||||
store i8 0, i8* %preadr
|
store i8 0, i8* %preadr
|
||||||
@@ -59,7 +56,7 @@ loop:
|
|||||||
%postofsnsw = sext i32 %postivnsw to i64
|
%postofsnsw = sext i32 %postivnsw to i64
|
||||||
%postadrnsw = getelementptr i8* %base, i64 %postofsnsw
|
%postadrnsw = getelementptr i8* %base, i64 %postofsnsw
|
||||||
store i8 0, i8* %postadrnsw
|
store i8 0, i8* %postadrnsw
|
||||||
%cond = icmp sgt i32 %limit, %iv
|
%cond = icmp sgt i32 %limit, %postiv
|
||||||
br i1 %cond, label %loop, label %exit
|
br i1 %cond, label %loop, label %exit
|
||||||
exit:
|
exit:
|
||||||
br label %return
|
br label %return
|
||||||
@@ -103,15 +100,13 @@ innerpreheader:
|
|||||||
; CHECK: innerloop:
|
; CHECK: innerloop:
|
||||||
;
|
;
|
||||||
; Eliminate %ofs2 after widening inneriv.
|
; Eliminate %ofs2 after widening inneriv.
|
||||||
|
; Eliminate %ofs3 after normalizing sext(innerpostiv)
|
||||||
; CHECK-NOT: sext
|
; CHECK-NOT: sext
|
||||||
; CHECK: getelementptr
|
; CHECK: getelementptr
|
||||||
;
|
;
|
||||||
; FIXME: We should not increase the number of IVs in this loop.
|
; FIXME: We should check that indvars does not increase the number of
|
||||||
; sext elimination plus LFTR results in 3 final IVs.
|
; IVs in this loop. sext elimination plus LFTR currently results in 2 final
|
||||||
;
|
; IVs. Waiting to remove LFTR.
|
||||||
; FIXME: eliminate %ofs3 based the loop pre/post conditions
|
|
||||||
; even though innerpostiv is not NSW, thus sign extending innerpostiv
|
|
||||||
; does not yield the same expression as incrementing the widened inneriv.
|
|
||||||
innerloop:
|
innerloop:
|
||||||
%inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
|
%inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
|
||||||
%innerpostiv = add i32 %inneriv, 1
|
%innerpostiv = add i32 %inneriv, 1
|
||||||
|
Reference in New Issue
Block a user