mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
Cost Model: Move the 'max unroll factor' variable to the TTI and add initial Cost Model support on ARM.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171928 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d700a2f9c5
commit
83be7b0dd3
@ -148,6 +148,11 @@ public:
|
||||
/// set to false, it returns the number of scalar registers.
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||
|
||||
/// \return The maximum unroll factor that the vectorizer should try to
|
||||
/// perform for this target. This number depends on the level of parallelism
|
||||
/// and the number of execution units in the CPU.
|
||||
virtual unsigned getMaximumUnrollFactor() const;
|
||||
|
||||
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||
|
||||
|
@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
|
||||
return PrevTTI->getNumberOfRegisters(Vector);
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
|
||||
return PrevTTI->getMaximumUnrollFactor();
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
|
||||
Type *Ty) const {
|
||||
return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
|
||||
@ -216,6 +220,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
|
||||
return 8;
|
||||
}
|
||||
|
||||
unsigned getMaximumUnrollFactor() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
return 1;
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ public:
|
||||
/// @{
|
||||
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||
virtual unsigned getMaximumUnrollFactor() const;
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
||||
int Index, Type *SubTp) const;
|
||||
@ -182,6 +183,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned BasicTTI::getMaximumUnrollFactor() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
// Check if any of the operands are vector operands.
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
|
@ -77,6 +77,31 @@ public:
|
||||
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
/// @{
|
||||
|
||||
unsigned getNumberOfRegisters(bool Vector) const {
|
||||
if (Vector) {
|
||||
if (ST->hasNEON())
|
||||
return 16;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ST->isThumb1Only())
|
||||
return 8;
|
||||
return 16;
|
||||
}
|
||||
|
||||
unsigned getMaximumUnrollFactor() const {
|
||||
// These are out of order CPUs:
|
||||
if (ST->isCortexA15() || ST->isSwift())
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// @}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -75,7 +75,6 @@ public:
|
||||
|
||||
/// \name Scalar TTI Implementations
|
||||
/// @{
|
||||
|
||||
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
|
||||
|
||||
/// @}
|
||||
@ -84,6 +83,7 @@ public:
|
||||
/// @{
|
||||
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) const;
|
||||
virtual unsigned getMaximumUnrollFactor() const;
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
|
||||
int Index, Type *SubTp) const;
|
||||
@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
|
||||
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
|
||||
// TODO: Currently the __builtin_popcount() implementation using SSE3
|
||||
@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
|
||||
return 8;
|
||||
}
|
||||
|
||||
unsigned X86TTI::getMaximumUnrollFactor() const {
|
||||
if (ST->isAtom())
|
||||
return 1;
|
||||
|
||||
// Sandybridge and Haswell have multiple execution ports and pipelined
|
||||
// vector units.
|
||||
if (ST->hasAVX())
|
||||
return 4;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
// Legalize the type.
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
|
||||
|
@ -116,9 +116,6 @@ static const unsigned RuntimeMemoryCheckThreshold = 4;
|
||||
/// This is the highest vector width that we try to generate.
|
||||
static const unsigned MaxVectorSize = 8;
|
||||
|
||||
/// This is the highest Unroll Factor.
|
||||
static const unsigned MaxUnrollSize = 4;
|
||||
|
||||
namespace {
|
||||
|
||||
// Forward declarations.
|
||||
@ -2715,6 +2712,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
|
||||
UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
|
||||
|
||||
// Clamp the unroll factor ranges to reasonable factors.
|
||||
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
|
||||
|
||||
if (UF > MaxUnrollSize)
|
||||
UF = MaxUnrollSize;
|
||||
else if (UF < 1)
|
||||
|
6
test/Transforms/LoopVectorize/ARM/lit.local.cfg
Normal file
6
test/Transforms/LoopVectorize/ARM/lit.local.cfg
Normal file
@ -0,0 +1,6 @@
|
||||
config.suffixes = ['.ll', '.c', '.cpp']
|
||||
|
||||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'ARM' in targets:
|
||||
config.unsupported = True
|
||||
|
25
test/Transforms/LoopVectorize/ARM/sanity.ll
Normal file
25
test/Transforms/LoopVectorize/ARM/sanity.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
target triple = "thumbv7-apple-ios3.0.0"
|
||||
|
||||
; Make sure that we are not crashing on ARM.
|
||||
|
||||
define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
|
||||
%1 = icmp sgt i32 %n, 0
|
||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||
|
||||
.lr.ph: ; preds = %0, %.lr.ph
|
||||
%i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
|
||||
%sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
|
||||
%2 = getelementptr inbounds i32* %A, i32 %i.02
|
||||
%3 = load i32* %2, align 4
|
||||
%4 = add nsw i32 %3, %sum.01
|
||||
%5 = add nsw i32 %i.02, 1
|
||||
%exitcond = icmp eq i32 %5, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
%sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
|
||||
ret i32 %sum.0.lcssa
|
||||
}
|
@ -53,8 +53,6 @@ define void @example1() nounwind uwtable ssp {
|
||||
;UNROLL: @example10b
|
||||
;UNROLL: load <4 x i16>
|
||||
;UNROLL: load <4 x i16>
|
||||
;UNROLL: load <4 x i16>
|
||||
;UNROLL: store <4 x i32>
|
||||
;UNROLL: store <4 x i32>
|
||||
;UNROLL: store <4 x i32>
|
||||
;UNROLL: ret void
|
||||
|
Loading…
Reference in New Issue
Block a user