mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 06:33:21 +00:00
Cost Model: Move the 'max unroll factor' variable to the TTI and add initial Cost Model support on ARM.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171928 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d700a2f9c5
commit
83be7b0dd3
@ -148,6 +148,11 @@ public:
|
|||||||
/// set to false, it returns the number of scalar registers.
|
/// set to false, it returns the number of scalar registers.
|
||||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||||
|
|
||||||
|
/// \return The maximum unroll factor that the vectorizer should try to
|
||||||
|
/// perform for this target. This number depends on the level of parallelism
|
||||||
|
/// and the number of execution units in the CPU.
|
||||||
|
virtual unsigned getMaximumUnrollFactor() const;
|
||||||
|
|
||||||
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
|
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
|
||||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||||
|
|
||||||
|
@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
|
|||||||
return PrevTTI->getNumberOfRegisters(Vector);
|
return PrevTTI->getNumberOfRegisters(Vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
|
||||||
|
return PrevTTI->getMaximumUnrollFactor();
|
||||||
|
}
|
||||||
|
|
||||||
unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
|
unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
|
||||||
Type *Ty) const {
|
Type *Ty) const {
|
||||||
return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
|
return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
|
||||||
@ -216,6 +220,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
|
|||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned getMaximumUnrollFactor() const {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -83,6 +83,7 @@ public:
|
|||||||
/// @{
|
/// @{
|
||||||
|
|
||||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||||
|
virtual unsigned getMaximumUnrollFactor() const;
|
||||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||||
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
||||||
int Index, Type *SubTp) const;
|
int Index, Type *SubTp) const;
|
||||||
@ -182,6 +183,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned BasicTTI::getMaximumUnrollFactor() const {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||||
// Check if any of the operands are vector operands.
|
// Check if any of the operands are vector operands.
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
|
@ -77,6 +77,31 @@ public:
|
|||||||
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
|
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
|
|
||||||
|
/// \name Vector TTI Implementations
|
||||||
|
/// @{
|
||||||
|
|
||||||
|
unsigned getNumberOfRegisters(bool Vector) const {
|
||||||
|
if (Vector) {
|
||||||
|
if (ST->hasNEON())
|
||||||
|
return 16;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ST->isThumb1Only())
|
||||||
|
return 8;
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned getMaximumUnrollFactor() const {
|
||||||
|
// These are out of order CPUs:
|
||||||
|
if (ST->isCortexA15() || ST->isSwift())
|
||||||
|
return 2;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
@ -75,7 +75,6 @@ public:
|
|||||||
|
|
||||||
/// \name Scalar TTI Implementations
|
/// \name Scalar TTI Implementations
|
||||||
/// @{
|
/// @{
|
||||||
|
|
||||||
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
|
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
@ -84,6 +83,7 @@ public:
|
|||||||
/// @{
|
/// @{
|
||||||
|
|
||||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||||
|
virtual unsigned getMaximumUnrollFactor() const;
|
||||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||||
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
||||||
int Index, Type *SubTp) const;
|
int Index, Type *SubTp) const;
|
||||||
@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
|
X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
|
||||||
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
|
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
|
||||||
// TODO: Currently the __builtin_popcount() implementation using SSE3
|
// TODO: Currently the __builtin_popcount() implementation using SSE3
|
||||||
@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
|
|||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned X86TTI::getMaximumUnrollFactor() const {
|
||||||
|
if (ST->isAtom())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// Sandybridge and Haswell have multiple execution ports and pipelined
|
||||||
|
// vector units.
|
||||||
|
if (ST->hasAVX())
|
||||||
|
return 4;
|
||||||
|
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
|
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
|
||||||
|
@ -116,9 +116,6 @@ static const unsigned RuntimeMemoryCheckThreshold = 4;
|
|||||||
/// This is the highest vector width that we try to generate.
|
/// This is the highest vector width that we try to generate.
|
||||||
static const unsigned MaxVectorSize = 8;
|
static const unsigned MaxVectorSize = 8;
|
||||||
|
|
||||||
/// This is the highest Unroll Factor.
|
|
||||||
static const unsigned MaxUnrollSize = 4;
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Forward declarations.
|
// Forward declarations.
|
||||||
@ -2715,6 +2712,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
|
|||||||
UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
|
UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
|
||||||
|
|
||||||
// Clamp the unroll factor ranges to reasonable factors.
|
// Clamp the unroll factor ranges to reasonable factors.
|
||||||
|
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
|
||||||
|
|
||||||
if (UF > MaxUnrollSize)
|
if (UF > MaxUnrollSize)
|
||||||
UF = MaxUnrollSize;
|
UF = MaxUnrollSize;
|
||||||
else if (UF < 1)
|
else if (UF < 1)
|
||||||
|
6
test/Transforms/LoopVectorize/ARM/lit.local.cfg
Normal file
6
test/Transforms/LoopVectorize/ARM/lit.local.cfg
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
config.suffixes = ['.ll', '.c', '.cpp']
|
||||||
|
|
||||||
|
targets = set(config.root.targets_to_build.split())
|
||||||
|
if not 'ARM' in targets:
|
||||||
|
config.unsupported = True
|
||||||
|
|
25
test/Transforms/LoopVectorize/ARM/sanity.ll
Normal file
25
test/Transforms/LoopVectorize/ARM/sanity.ll
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||||
|
target triple = "thumbv7-apple-ios3.0.0"
|
||||||
|
|
||||||
|
; Make sure that we are not crashing on ARM.
|
||||||
|
|
||||||
|
define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
|
||||||
|
%1 = icmp sgt i32 %n, 0
|
||||||
|
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||||
|
|
||||||
|
.lr.ph: ; preds = %0, %.lr.ph
|
||||||
|
%i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%2 = getelementptr inbounds i32* %A, i32 %i.02
|
||||||
|
%3 = load i32* %2, align 4
|
||||||
|
%4 = add nsw i32 %3, %sum.01
|
||||||
|
%5 = add nsw i32 %i.02, 1
|
||||||
|
%exitcond = icmp eq i32 %5, %n
|
||||||
|
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
._crit_edge: ; preds = %.lr.ph, %0
|
||||||
|
%sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
|
||||||
|
ret i32 %sum.0.lcssa
|
||||||
|
}
|
@ -53,8 +53,6 @@ define void @example1() nounwind uwtable ssp {
|
|||||||
;UNROLL: @example10b
|
;UNROLL: @example10b
|
||||||
;UNROLL: load <4 x i16>
|
;UNROLL: load <4 x i16>
|
||||||
;UNROLL: load <4 x i16>
|
;UNROLL: load <4 x i16>
|
||||||
;UNROLL: load <4 x i16>
|
|
||||||
;UNROLL: store <4 x i32>
|
|
||||||
;UNROLL: store <4 x i32>
|
;UNROLL: store <4 x i32>
|
||||||
;UNROLL: store <4 x i32>
|
;UNROLL: store <4 x i32>
|
||||||
;UNROLL: ret void
|
;UNROLL: ret void
|
||||||
|
Loading…
x
Reference in New Issue
Block a user