mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 06:30:16 +00:00
Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.
e.g. Turns this loop: LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx movw %dx, %si LBB1_2: # bb movl L_X$non_lazy_ptr, %edi movw %si, (%edi) movl L_Y$non_lazy_ptr, %edi movw %dx, (%edi) addw $4, %dx incw %si incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb into LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx LBB1_2: # bb movl L_X$non_lazy_ptr, %esi movw %cx, (%esi) movl L_Y$non_lazy_ptr, %esi movw %dx, (%esi) addw $4, %dx incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43375 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
081fdf238b
commit
2bd122c4d9
@ -104,7 +104,7 @@ public:
|
|||||||
/// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
|
/// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
|
||||||
/// srl/add/sra.
|
/// srl/add/sra.
|
||||||
bool isPow2DivCheap() const { return Pow2DivIsCheap; }
|
bool isPow2DivCheap() const { return Pow2DivIsCheap; }
|
||||||
|
|
||||||
/// getSetCCResultTy - Return the ValueType of the result of setcc operations.
|
/// getSetCCResultTy - Return the ValueType of the result of setcc operations.
|
||||||
///
|
///
|
||||||
MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
|
MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
|
||||||
@ -994,6 +994,13 @@ public:
|
|||||||
/// TODO: Handle pre/postinc as well.
|
/// TODO: Handle pre/postinc as well.
|
||||||
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
|
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
|
||||||
|
|
||||||
|
/// isTruncateFree - Return true if it's free to truncate a value of
|
||||||
|
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
|
||||||
|
/// register EAX to i16 by referencing its sub-register AX.
|
||||||
|
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
// Div utility functions
|
// Div utility functions
|
||||||
//
|
//
|
||||||
|
@ -339,20 +339,18 @@ void foo(int N) {
|
|||||||
for (i = 0; i < N; i++) { X = i; Y = i*4; }
|
for (i = 0; i < N; i++) { X = i; Y = i*4; }
|
||||||
}
|
}
|
||||||
|
|
||||||
LBB1_1: #bb.preheader
|
LBB1_1: # entry.bb_crit_edge
|
||||||
xorl %ecx, %ecx
|
xorl %ecx, %ecx
|
||||||
xorw %dx, %dx
|
xorw %dx, %dx
|
||||||
LBB1_2: #bb
|
LBB1_2: # bb
|
||||||
movl L_X$non_lazy_ptr, %esi
|
movl L_X$non_lazy_ptr, %esi
|
||||||
movw %dx, (%esi)
|
movw %cx, (%esi)
|
||||||
movw %dx, %si
|
movl L_Y$non_lazy_ptr, %esi
|
||||||
shlw $2, %si
|
movw %dx, (%esi)
|
||||||
movl L_Y$non_lazy_ptr, %edi
|
addw $4, %dx
|
||||||
movw %si, (%edi)
|
incl %ecx
|
||||||
incl %ecx
|
cmpl %eax, %ecx
|
||||||
incw %dx
|
jne LBB1_2 # bb
|
||||||
cmpl %eax, %ecx
|
|
||||||
jne LBB1_2 #bb
|
|
||||||
|
|
||||||
vs.
|
vs.
|
||||||
|
|
||||||
@ -367,11 +365,7 @@ L4:
|
|||||||
cmpl %edx, %edi
|
cmpl %edx, %edi
|
||||||
jne L4
|
jne L4
|
||||||
|
|
||||||
There are 3 issues:
|
This is due to the lack of post regalloc LICM.
|
||||||
|
|
||||||
1. Lack of post regalloc LICM.
|
|
||||||
2. LSR unable to reused IV for a different type (i16 vs. i32) even though
|
|
||||||
the cast would be free.
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
|
||||||
|
if (!Ty1->isInteger() || !Ty2->isInteger())
|
||||||
|
return false;
|
||||||
|
return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
||||||
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
||||||
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
|
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
|
||||||
|
@ -359,6 +359,11 @@ namespace llvm {
|
|||||||
/// by AM is legal for this target, for a load/store of the specified type.
|
/// by AM is legal for this target, for a load/store of the specified type.
|
||||||
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
|
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
|
||||||
|
|
||||||
|
/// isTruncateFree - Return true if it's free to truncate a value of
|
||||||
|
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
|
||||||
|
/// register EAX to i16 by referencing its sub-register AX.
|
||||||
|
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
|
||||||
|
|
||||||
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
||||||
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
||||||
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
|
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
|
||||||
|
@ -178,7 +178,7 @@ private:
|
|||||||
bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
|
bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
|
||||||
const SCEVHandle *&CondStride);
|
const SCEVHandle *&CondStride);
|
||||||
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
|
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
|
||||||
unsigned CheckForIVReuse(bool, const SCEVHandle&,
|
unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
|
||||||
IVExpr&, const Type*,
|
IVExpr&, const Type*,
|
||||||
const std::vector<BasedUser>& UsersToProcess);
|
const std::vector<BasedUser>& UsersToProcess);
|
||||||
bool ValidStride(bool, int64_t,
|
bool ValidStride(bool, int64_t,
|
||||||
@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
|
|||||||
|
|
||||||
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
|
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
|
||||||
/// a nop.
|
/// a nop.
|
||||||
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
|
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
|
||||||
const Type *NewTy) {
|
const Type *Ty2) {
|
||||||
if (Ty == NewTy)
|
if (Ty1 == Ty2)
|
||||||
return false;
|
return false;
|
||||||
return (!Ty->canLosslesslyBitCastTo(NewTy) &&
|
if (TLI && TLI->isTruncateFree(Ty1, Ty2))
|
||||||
!(isa<PointerType>(NewTy) &&
|
return false;
|
||||||
Ty->canLosslesslyBitCastTo(UIntPtrTy)) &&
|
return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
|
||||||
!(isa<PointerType>(Ty) &&
|
!(isa<PointerType>(Ty2) &&
|
||||||
NewTy->canLosslesslyBitCastTo(UIntPtrTy)));
|
Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
|
||||||
|
!(isa<PointerType>(Ty1) &&
|
||||||
|
Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
|
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
|
||||||
@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
|
|||||||
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
|
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
|
||||||
/// reuse is possible.
|
/// reuse is possible.
|
||||||
unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
|
unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
|
||||||
|
bool AllUsesAreAddresses,
|
||||||
const SCEVHandle &Stride,
|
const SCEVHandle &Stride,
|
||||||
IVExpr &IV, const Type *Ty,
|
IVExpr &IV, const Type *Ty,
|
||||||
const std::vector<BasedUser>& UsersToProcess) {
|
const std::vector<BasedUser>& UsersToProcess) {
|
||||||
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
|
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
|
||||||
int64_t SInt = SC->getValue()->getSExtValue();
|
int64_t SInt = SC->getValue()->getSExtValue();
|
||||||
if (SInt == 1) return 0;
|
|
||||||
|
|
||||||
for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
|
for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
|
||||||
SE = IVsByStride.end(); SI != SE; ++SI) {
|
SE = IVsByStride.end(); SI != SE; ++SI) {
|
||||||
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
|
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
|
||||||
if (SInt != -SSInt &&
|
if (SI->first != Stride &&
|
||||||
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
|
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
|
||||||
continue;
|
continue;
|
||||||
int64_t Scale = SInt / SSInt;
|
int64_t Scale = SInt / SSInt;
|
||||||
|
// When scale is 1, we don't need to worry about whether the
|
||||||
|
// multiplication can be folded into the addressing mode.
|
||||||
|
if (!AllUsesAreAddresses && Scale != 1)
|
||||||
|
continue;
|
||||||
// Check that this stride is valid for all the types used for loads and
|
// Check that this stride is valid for all the types used for loads and
|
||||||
// stores; if it can be used for some and not others, we might as well use
|
// stores; if it can be used for some and not others, we might as well use
|
||||||
// the original stride everywhere, since we have to create the IV for it
|
// the original stride everywhere, since we have to create the IV for it
|
||||||
@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
|
|||||||
// FIXME: Only handle base == 0 for now.
|
// FIXME: Only handle base == 0 for now.
|
||||||
// Only reuse previous IV if it would not require a type conversion.
|
// Only reuse previous IV if it would not require a type conversion.
|
||||||
if (isZero(II->Base) &&
|
if (isZero(II->Base) &&
|
||||||
!RequiresTypeConversion(II->Base->getType(),Ty)) {
|
!RequiresTypeConversion(II->Base->getType(), Ty)) {
|
||||||
IV = *II;
|
IV = *II;
|
||||||
return Scale;
|
return Scale;
|
||||||
}
|
}
|
||||||
@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
|
|||||||
SE->getIntegerSCEV(0, Type::Int32Ty),
|
SE->getIntegerSCEV(0, Type::Int32Ty),
|
||||||
0, 0);
|
0, 0);
|
||||||
unsigned RewriteFactor = 0;
|
unsigned RewriteFactor = 0;
|
||||||
if (AllUsesAreAddresses)
|
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
|
||||||
RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV,
|
Stride, ReuseIV, CommonExprs->getType(),
|
||||||
CommonExprs->getType(),
|
UsersToProcess);
|
||||||
UsersToProcess);
|
|
||||||
if (RewriteFactor != 0) {
|
if (RewriteFactor != 0) {
|
||||||
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
|
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
|
||||||
<< " and BASE " << *ReuseIV.Base << " :\n";
|
<< " and BASE " << *ReuseIV.Base << " :\n";
|
||||||
|
23
test/CodeGen/X86/loop-strength-reduce5.ll
Normal file
23
test/CodeGen/X86/loop-strength-reduce5.ll
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
|
||||||
|
|
||||||
|
@X = weak global i16 0 ; <i16*> [#uses=1]
|
||||||
|
@Y = weak global i16 0 ; <i16*> [#uses=1]
|
||||||
|
|
||||||
|
define void @foo(i32 %N) {
|
||||||
|
entry:
|
||||||
|
%tmp1019 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
|
||||||
|
br i1 %tmp1019, label %bb, label %return
|
||||||
|
|
||||||
|
bb: ; preds = %bb, %entry
|
||||||
|
%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||||
|
%tmp1 = trunc i32 %i.014.0 to i16 ; <i16> [#uses=2]
|
||||||
|
volatile store i16 %tmp1, i16* @X, align 2
|
||||||
|
%tmp34 = shl i16 %tmp1, 2 ; <i16> [#uses=1]
|
||||||
|
volatile store i16 %tmp34, i16* @Y, align 2
|
||||||
|
%indvar.next = add i32 %i.014.0, 1 ; <i32> [#uses=2]
|
||||||
|
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
|
||||||
|
br i1 %exitcond, label %return, label %bb
|
||||||
|
|
||||||
|
return: ; preds = %bb, %entry
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user