Cost Model: Move the 'max unroll factor' variable to the TTI and add initial Cost Model support on ARM.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171928 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-01-09 01:15:42 +00:00
parent d700a2f9c5
commit 83be7b0dd3
9 changed files with 89 additions and 7 deletions

View File

@ -148,6 +148,11 @@ public:
/// set to false, it returns the number of scalar registers.
virtual unsigned getNumberOfRegisters(bool Vector) const;
/// \return The maximum unroll factor that the vectorizer should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
virtual unsigned getMaximumUnrollFactor() const;
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;

View File

@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
return PrevTTI->getNumberOfRegisters(Vector);
}
unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
return PrevTTI->getMaximumUnrollFactor();
}
unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
Type *Ty) const {
return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
@ -216,6 +220,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return 8;
}
unsigned getMaximumUnrollFactor() const {
return 1;
}
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
return 1;
}

View File

@ -83,6 +83,7 @@ public:
/// @{
virtual unsigned getNumberOfRegisters(bool Vector) const;
virtual unsigned getMaximumUnrollFactor() const;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const;
@ -182,6 +183,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
return 1;
}
unsigned BasicTTI::getMaximumUnrollFactor() const {
return 1;
}
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
// Check if any of the operands are vector operands.
int ISD = TLI->InstructionOpcodeToISD(Opcode);

View File

@ -77,6 +77,31 @@ public:
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
/// @}
/// \name Vector TTI Implementations
/// @{
unsigned getNumberOfRegisters(bool Vector) const {
if (Vector) {
if (ST->hasNEON())
return 16;
return 0;
}
if (ST->isThumb1Only())
return 8;
return 16;
}
unsigned getMaximumUnrollFactor() const {
// These are out of order CPUs:
if (ST->isCortexA15() || ST->isSwift())
return 2;
return 1;
}
/// @}
};
} // end anonymous namespace

View File

@ -75,7 +75,6 @@ public:
/// \name Scalar TTI Implementations
/// @{
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
/// @}
@ -84,6 +83,7 @@ public:
/// @{
virtual unsigned getNumberOfRegisters(bool Vector) const;
virtual unsigned getMaximumUnrollFactor() const;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const;
@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
return -1;
}
X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
// TODO: Currently the __builtin_popcount() implementation using SSE3
@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
return 8;
}
unsigned X86TTI::getMaximumUnrollFactor() const {
if (ST->isAtom())
return 1;
// Sandybridge and Haswell have multiple execution ports and pipelined
// vector units.
if (ST->hasAVX())
return 4;
return 2;
}
unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);

View File

@ -116,9 +116,6 @@ static const unsigned RuntimeMemoryCheckThreshold = 4;
/// This is the highest vector width that we try to generate.
static const unsigned MaxVectorSize = 8;
/// This is the highest Unroll Factor.
static const unsigned MaxUnrollSize = 4;
namespace {
// Forward declarations.
@ -2715,6 +2712,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
// Clamp the unroll factor ranges to reasonable factors.
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
if (UF > MaxUnrollSize)
UF = MaxUnrollSize;
else if (UF < 1)

View File

@ -0,0 +1,6 @@
config.suffixes = ['.ll', '.c', '.cpp']
targets = set(config.root.targets_to_build.split())
if not 'ARM' in targets:
config.unsupported = True

View File

@ -0,0 +1,25 @@
; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios3.0.0"
; Make sure that we are not crashing on ARM.
define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0, %.lr.ph
%i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
%sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
%2 = getelementptr inbounds i32* %A, i32 %i.02
%3 = load i32* %2, align 4
%4 = add nsw i32 %3, %sum.01
%5 = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %5, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %0
%sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
ret i32 %sum.0.lcssa
}

View File

@ -53,8 +53,6 @@ define void @example1() nounwind uwtable ssp {
;UNROLL: @example10b
;UNROLL: load <4 x i16>
;UNROLL: load <4 x i16>
;UNROLL: load <4 x i16>
;UNROLL: store <4 x i32>
;UNROLL: store <4 x i32>
;UNROLL: store <4 x i32>
;UNROLL: ret void