Add diagnostics to the vectorizer cost model.

When the cost model determines vectorization is not possible/profitable these remarks print an analysis of that decision.

Note that in selectVectorizationFactor() we can assume that OptForSize and ForceVectorization are mutually exclusive.

Reviewed by Arnold Schwaighofer


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tyler Nowicki 2014-08-02 00:14:03 +00:00
parent 1b23e26921
commit 842a06e8dd
2 changed files with 88 additions and 16 deletions

View File

@ -209,6 +209,7 @@ namespace {
// Forward declarations. // Forward declarations.
class LoopVectorizationLegality; class LoopVectorizationLegality;
class LoopVectorizationCostModel; class LoopVectorizationCostModel;
class LoopVectorizeHints;
/// Optimization analysis message produced during vectorization. Messages inform /// Optimization analysis message produced during vectorization. Messages inform
/// the user why vectorization did not occur. /// the user why vectorization did not occur.
@ -877,8 +878,9 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI, const TargetTransformInfo &TTI,
const DataLayout *DL, const TargetLibraryInfo *TLI) const DataLayout *DL, const TargetLibraryInfo *TLI,
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {} const Function *F, const LoopVectorizeHints *Hints)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {}
/// Information about vectorization costs /// Information about vectorization costs
struct VectorizationFactor { struct VectorizationFactor {
@ -889,9 +891,7 @@ public:
/// This method checks every power of two up to VF. If UserVF is not ZERO /// This method checks every power of two up to VF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is /// then this vectorization factor will be selected if vectorization is
/// possible. /// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize, VectorizationFactor selectVectorizationFactor(bool OptForSize);
unsigned UserVF,
bool ForceVectorization);
/// \return The size (in bits) of the widest type in the code that /// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as /// needs to be vectorized. We ignore values that remain scalar such as
@ -903,8 +903,7 @@ public:
/// based on register pressure and other parameters. /// based on register pressure and other parameters.
/// VF and LoopCost are the selected vectorization factor and the cost of the /// VF and LoopCost are the selected vectorization factor and the cost of the
/// selected VF. /// selected VF.
unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF, unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost);
unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage /// \brief A struct that represents some properties of the register usage
/// of a loop. /// of a loop.
@ -940,6 +939,16 @@ private:
/// as a vector operation. /// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I); bool isConsecutiveLoadOrStore(Instruction *I);
/// Report an analysis message to assist the user in diagnosing loops that are
/// not vectorized.
void emitAnalysis(Report &Message) {
DebugLoc DL = TheLoop->getStartLoc();
if (Instruction *I = Message.getInstr())
DL = I->getDebugLoc();
emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
*TheFunction, DL, Message.str());
}
/// The loop that we evaluate. /// The loop that we evaluate.
Loop *TheLoop; Loop *TheLoop;
/// Scev analysis. /// Scev analysis.
@ -954,6 +963,9 @@ private:
const DataLayout *DL; const DataLayout *DL;
/// Target Library Info. /// Target Library Info.
const TargetLibraryInfo *TLI; const TargetLibraryInfo *TLI;
const Function *TheFunction;
// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
}; };
/// Utility class for getting and setting loop vectorizer hints in the form /// Utility class for getting and setting loop vectorizer hints in the form
@ -1301,7 +1313,7 @@ struct LoopVectorize : public FunctionPass {
} }
// Use the cost model. // Use the cost model.
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints);
// Check the function attributes to find out if this function should be // Check the function attributes to find out if this function should be
// optimized for size. // optimized for size.
@ -1336,13 +1348,11 @@ struct LoopVectorize : public FunctionPass {
// Select the optimal vectorization factor. // Select the optimal vectorization factor.
const LoopVectorizationCostModel::VectorizationFactor VF = const LoopVectorizationCostModel::VectorizationFactor VF =
CM.selectVectorizationFactor(OptForSize, Hints.getWidth(), CM.selectVectorizationFactor(OptForSize);
Hints.getForce() ==
LoopVectorizeHints::FK_Enabled);
// Select the unroll factor. // Select the unroll factor.
const unsigned UF = const unsigned UF =
CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost); CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost);
DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
<< DebugLocStr << '\n'); << DebugLocStr << '\n');
@ -5263,17 +5273,17 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
} }
LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
unsigned UserVF,
bool ForceVectorization) {
// Width 1 means no vectorize // Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U }; VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) { if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
emitAnalysis(Report() << "runtime pointer checks needed. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
return Factor; return Factor;
} }
if (!EnableCondStoresVectorization && Legal->NumPredStores) { if (!EnableCondStoresVectorization && Legal->NumPredStores) {
emitAnalysis(Report() << "store that is conditionally executed prevents vectorization");
DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
return Factor; return Factor;
} }
@ -5308,6 +5318,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
if (OptForSize) { if (OptForSize) {
// If we are unable to calculate the trip count then don't try to vectorize. // If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) { if (TC < 2) {
emitAnalysis(Report() << "unable to calculate the loop count due to complex control flow");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor; return Factor;
} }
@ -5321,11 +5332,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// If the trip count that we found modulo the vectorization factor is not // If the trip count that we found modulo the vectorization factor is not
// zero then we require a tail. // zero then we require a tail.
if (VF < 2) { if (VF < 2) {
emitAnalysis(Report() << "cannot optimize for size and vectorize at the same time. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor; return Factor;
} }
} }
int UserVF = Hints->getWidth();
if (UserVF != 0) { if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
@ -5341,6 +5354,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned Width = 1; unsigned Width = 1;
DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
// Ignore scalar width, because the user explicitly wants vectorization. // Ignore scalar width, because the user explicitly wants vectorization.
if (ForceVectorization && VF > 1) { if (ForceVectorization && VF > 1) {
Width = 2; Width = 2;
@ -5410,7 +5424,6 @@ unsigned LoopVectorizationCostModel::getWidestType() {
unsigned unsigned
LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
unsigned UserUF,
unsigned VF, unsigned VF,
unsigned LoopCost) { unsigned LoopCost) {
@ -5429,6 +5442,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// to the increased register pressure. // to the increased register pressure.
// Use the user preference, unless 'auto' is selected. // Use the user preference, unless 'auto' is selected.
int UserUF = Hints->getUnroll();
if (UserUF != 0) if (UserUF != 0)
return UserUF; return UserUF;

View File

@ -0,0 +1,58 @@
; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
define void @conditional_store(i32* noalias nocapture %indices) #0 {
entry:
br label %for.body, !dbg !10
for.body: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
%arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12
%0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14
%cmp1 = icmp eq i32 %0, 1024, !dbg !12
br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
if.then: ; preds = %for.body
store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14
br label %for.inc, !dbg !18
for.inc: ; preds = %for.body, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
%exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10
br i1 %exitcond, label %for.end, label %for.body, !dbg !10
for.end: ; preds = %for.inc
ret void, !dbg !19
}
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.6.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
!1 = metadata !{metadata !"source.c", metadata !"."}
!2 = metadata !{}
!3 = metadata !{metadata !4}
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"conditional_store", metadata !"conditional_store", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*)* @conditional_store, null, null, metadata !2, i32 1}
!5 = metadata !{i32 786473, metadata !1}
!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
!9 = metadata !{metadata !"clang version 3.6.0"}
!10 = metadata !{i32 2, i32 8, metadata !11, null}
!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 3, i32 0, i32 0}
!12 = metadata !{i32 3, i32 9, metadata !13, null}
!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 9, i32 0, i32 1}
!14 = metadata !{metadata !15, metadata !15, i64 0}
!15 = metadata !{metadata !"int", metadata !16, i64 0}
!16 = metadata !{metadata !"omnipotent char", metadata !17, i64 0}
!17 = metadata !{metadata !"Simple C/C++ TBAA"}
!18 = metadata !{i32 3, i32 29, metadata !13, null}
!19 = metadata !{i32 4, i32 1, metadata !4, null}