Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.

e.g.
Turns this loop:
LBB1_1: # entry.bb_crit_edge
        xorl    %ecx, %ecx
        xorw    %dx, %dx
        movw    %dx, %si
LBB1_2: # bb
        movl    L_X$non_lazy_ptr, %edi
        movw    %si, (%edi)
        movl    L_Y$non_lazy_ptr, %edi
        movw    %dx, (%edi)
		addw    $4, %dx
		incw    %si
		incl    %ecx
		cmpl    %eax, %ecx
		jne     LBB1_2  # bb
	
into

LBB1_1: # entry.bb_crit_edge
        xorl    %ecx, %ecx
        xorw    %dx, %dx
LBB1_2: # bb
        movl    L_X$non_lazy_ptr, %esi
        movw    %cx, (%esi)
        movl    L_Y$non_lazy_ptr, %esi
        movw    %dx, (%esi)
        addw    $4, %dx
		incl    %ecx
        cmpl    %eax, %ecx
        jne     LBB1_2  # bb


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43375 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2007-10-26 01:56:11 +00:00
parent 081fdf238b
commit 2bd122c4d9
6 changed files with 77 additions and 37 deletions

View File

@ -104,7 +104,7 @@ public:
/// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
/// srl/add/sra. /// srl/add/sra.
bool isPow2DivCheap() const { return Pow2DivIsCheap; } bool isPow2DivCheap() const { return Pow2DivIsCheap; }
/// getSetCCResultTy - Return the ValueType of the result of setcc operations. /// getSetCCResultTy - Return the ValueType of the result of setcc operations.
/// ///
MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; } MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
@ -994,6 +994,13 @@ public:
/// TODO: Handle pre/postinc as well. /// TODO: Handle pre/postinc as well.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const {
return false;
}
//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//
// Div utility functions // Div utility functions
// //

View File

@ -339,20 +339,18 @@ void foo(int N) {
for (i = 0; i < N; i++) { X = i; Y = i*4; } for (i = 0; i < N; i++) { X = i; Y = i*4; }
} }
LBB1_1: #bb.preheader LBB1_1: # entry.bb_crit_edge
xorl %ecx, %ecx xorl %ecx, %ecx
xorw %dx, %dx xorw %dx, %dx
LBB1_2: #bb LBB1_2: # bb
movl L_X$non_lazy_ptr, %esi movl L_X$non_lazy_ptr, %esi
movw %dx, (%esi) movw %cx, (%esi)
movw %dx, %si movl L_Y$non_lazy_ptr, %esi
shlw $2, %si movw %dx, (%esi)
movl L_Y$non_lazy_ptr, %edi addw $4, %dx
movw %si, (%edi) incl %ecx
incl %ecx cmpl %eax, %ecx
incw %dx jne LBB1_2 # bb
cmpl %eax, %ecx
jne LBB1_2 #bb
vs. vs.
@ -367,11 +365,7 @@ L4:
cmpl %edx, %edi cmpl %edx, %edi
jne L4 jne L4
There are 3 issues: This is due to the lack of post regalloc LICM.
1. Lack of post regalloc LICM.
2. LSR unable to reused IV for a different type (i16 vs. i32) even though
the cast would be free.
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//

View File

@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
} }
bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
if (!Ty1->isInteger() || !Ty2->isInteger())
return false;
return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only /// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values

View File

@ -359,6 +359,11 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type. /// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
/// isShuffleMaskLegal - Targets can use this to indicate that they only /// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// By default, if a target supports the VECTOR_SHUFFLE node, all mask

View File

@ -178,7 +178,7 @@ private:
bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse, bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride); const SCEVHandle *&CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
unsigned CheckForIVReuse(bool, const SCEVHandle&, unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
IVExpr&, const Type*, IVExpr&, const Type*,
const std::vector<BasedUser>& UsersToProcess); const std::vector<BasedUser>& UsersToProcess);
bool ValidStride(bool, int64_t, bool ValidStride(bool, int64_t,
@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not /// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
/// a nop. /// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty, bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const Type *NewTy) { const Type *Ty2) {
if (Ty == NewTy) if (Ty1 == Ty2)
return false; return false;
return (!Ty->canLosslesslyBitCastTo(NewTy) && if (TLI && TLI->isTruncateFree(Ty1, Ty2))
!(isa<PointerType>(NewTy) && return false;
Ty->canLosslesslyBitCastTo(UIntPtrTy)) && return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
!(isa<PointerType>(Ty) && !(isa<PointerType>(Ty2) &&
NewTy->canLosslesslyBitCastTo(UIntPtrTy))); Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
!(isa<PointerType>(Ty1) &&
Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
} }
/// CheckForIVReuse - Returns the multiple if the stride is the multiple /// CheckForIVReuse - Returns the multiple if the stride is the multiple
@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
/// this stride to be rewritten as prev iv * factor. It returns 0 if no /// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible. /// reuse is possible.
unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
const SCEVHandle &Stride, const SCEVHandle &Stride,
IVExpr &IV, const Type *Ty, IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) { const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue(); int64_t SInt = SC->getValue()->getSExtValue();
if (SInt == 1) return 0;
for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(), for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
SE = IVsByStride.end(); SI != SE; ++SI) { SE = IVsByStride.end(); SI != SE; ++SI) {
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SInt != -SSInt && if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0)) (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue; continue;
int64_t Scale = SInt / SSInt; int64_t Scale = SInt / SSInt;
// When scale is 1, we don't need to worry about whether the
// multiplication can be folded into the addressing mode.
if (!AllUsesAreAddresses && Scale != 1)
continue;
// Check that this stride is valid for all the types used for loads and // Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use // stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it // the original stride everywhere, since we have to create the IV for it
@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// FIXME: Only handle base == 0 for now. // FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion. // Only reuse previous IV if it would not require a type conversion.
if (isZero(II->Base) && if (isZero(II->Base) &&
!RequiresTypeConversion(II->Base->getType(),Ty)) { !RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II; IV = *II;
return Scale; return Scale;
} }
@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
SE->getIntegerSCEV(0, Type::Int32Ty), SE->getIntegerSCEV(0, Type::Int32Ty),
0, 0); 0, 0);
unsigned RewriteFactor = 0; unsigned RewriteFactor = 0;
if (AllUsesAreAddresses) RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV, Stride, ReuseIV, CommonExprs->getType(),
CommonExprs->getType(), UsersToProcess);
UsersToProcess);
if (RewriteFactor != 0) { if (RewriteFactor != 0) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n"; << " and BASE " << *ReuseIV.Base << " :\n";

View File

@ -0,0 +1,23 @@
; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
@X = weak global i16 0 ; <i16*> [#uses=1]
@Y = weak global i16 0 ; <i16*> [#uses=1]
define void @foo(i32 %N) {
entry:
%tmp1019 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
br i1 %tmp1019, label %bb, label %return
bb: ; preds = %bb, %entry
%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%tmp1 = trunc i32 %i.014.0 to i16 ; <i16> [#uses=2]
volatile store i16 %tmp1, i16* @X, align 2
%tmp34 = shl i16 %tmp1, 2 ; <i16> [#uses=1]
volatile store i16 %tmp34, i16* @Y, align 2
%indvar.next = add i32 %i.014.0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}