From 842a06e8dd73ed9cd541c4e8c786c425af9bc461 Mon Sep 17 00:00:00 2001 From: Tyler Nowicki Date: Sat, 2 Aug 2014 00:14:03 +0000 Subject: [PATCH] Add diagnostics to the vectorizer cost model. When the cost model determines vectorization is not possible/profitable these remarks print an analysis of that decision. Note that in selectVectorizationFactor() we can assume that OptForSize and ForceVectorization are mutually exclusive. Reviewed by Arnold Schwaighofer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 46 ++++++++++----- .../LoopVectorize/conditional-assignment.ll | 58 +++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) create mode 100644 test/Transforms/LoopVectorize/conditional-assignment.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9daee603f3b..9f6cfeca2c0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -209,6 +209,7 @@ namespace { // Forward declarations. class LoopVectorizationLegality; class LoopVectorizationCostModel; +class LoopVectorizeHints; /// Optimization analysis message produced during vectorization. Messages inform /// the user why vectorization did not occur. @@ -877,8 +878,9 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - const DataLayout *DL, const TargetLibraryInfo *TLI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {} + const DataLayout *DL, const TargetLibraryInfo *TLI, + const Function *F, const LoopVectorizeHints *Hints) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {} /// Information about vectorization costs struct VectorizationFactor { @@ -889,9 +891,7 @@ public: /// This method checks every power of two up to VF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - VectorizationFactor selectVectorizationFactor(bool OptForSize, - unsigned UserVF, - bool ForceVectorization); + VectorizationFactor selectVectorizationFactor(bool OptForSize); /// \return The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as @@ -903,8 +903,7 @@ public: /// based on register pressure and other parameters. /// VF and LoopCost are the selected vectorization factor and the cost of the /// selected VF. - unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF, - unsigned LoopCost); + unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost); /// \brief A struct that represents some properties of the register usage /// of a loop. @@ -940,6 +939,16 @@ private: /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); + /// Report an analysis message to assist the user in diagnosing loops that are + /// not vectorized. + void emitAnalysis(Report &Message) { + DebugLoc DL = TheLoop->getStartLoc(); + if (Instruction *I = Message.getInstr()) + DL = I->getDebugLoc(); + emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE, + *TheFunction, DL, Message.str()); + } + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -954,6 +963,9 @@ private: const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; + const Function *TheFunction; + // Loop Vectorize Hint. + const LoopVectorizeHints *Hints; }; /// Utility class for getting and setting loop vectorizer hints in the form @@ -1301,7 +1313,7 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints); // Check the function attributes to find out if this function should be // optimized for size. @@ -1336,13 +1348,11 @@ struct LoopVectorize : public FunctionPass { // Select the optimal vectorization factor. const LoopVectorizationCostModel::VectorizationFactor VF = - CM.selectVectorizationFactor(OptForSize, Hints.getWidth(), - Hints.getForce() == - LoopVectorizeHints::FK_Enabled); + CM.selectVectorizationFactor(OptForSize); // Select the unroll factor. const unsigned UF = - CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost); + CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost); DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); @@ -5263,17 +5273,17 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, } LoopVectorizationCostModel::VectorizationFactor -LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, - unsigned UserVF, - bool ForceVectorization) { +LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // Width 1 means no vectorize VectorizationFactor Factor = { 1U, 0U }; if (OptForSize && Legal->getRuntimePointerCheck()->Need) { + emitAnalysis(Report() << "runtime pointer checks needed. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os"); DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); return Factor; } if (!EnableCondStoresVectorization && Legal->NumPredStores) { + emitAnalysis(Report() << "store that is conditionally executed prevents vectorization"); DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); return Factor; } @@ -5308,6 +5318,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, if (OptForSize) { // If we are unable to calculate the trip count then don't try to vectorize. if (TC < 2) { + emitAnalysis(Report() << "unable to calculate the loop count due to complex control flow"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); return Factor; } @@ -5321,11 +5332,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // If the trip count that we found modulo the vectorization factor is not // zero then we require a tail. if (VF < 2) { + emitAnalysis(Report() << "cannot optimize for size and vectorize at the same time. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); return Factor; } } + int UserVF = Hints->getWidth(); if (UserVF != 0) { assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); @@ -5341,6 +5354,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned Width = 1; DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; // Ignore scalar width, because the user explicitly wants vectorization. if (ForceVectorization && VF > 1) { Width = 2; @@ -5410,7 +5424,6 @@ unsigned LoopVectorizationCostModel::getWidestType() { unsigned LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, - unsigned UserUF, unsigned VF, unsigned LoopCost) { @@ -5429,6 +5442,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, // to the increased register pressure. // Use the user preference, unless 'auto' is selected. + int UserUF = Hints->getUnroll(); if (UserUF != 0) return UserUF; diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll new file mode 100644 index 00000000000..f1c76bd2315 --- /dev/null +++ b/test/Transforms/LoopVectorize/conditional-assignment.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Function Attrs: nounwind ssp uwtable +define void @conditional_store(i32* noalias nocapture %indices) #0 { +entry: + br label %for.body, !dbg !10 + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12 + %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14 + %cmp1 = icmp eq i32 %0, 1024, !dbg !12 + br i1 %cmp1, label %if.then, label %for.inc, !dbg !12 + +if.then: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14 + br label %for.inc, !dbg !18 + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10 + br i1 %exitcond, label %for.end, label %for.body, !dbg !10 + +for.end: ; preds = %for.inc + ret void, !dbg !19 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.6.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!1 = metadata !{metadata !"source.c", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"conditional_store", metadata !"conditional_store", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*)* @conditional_store, null, null, metadata !2, i32 1} +!5 = metadata !{i32 786473, metadata !1} +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.6.0"} +!10 = metadata !{i32 2, i32 8, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 3, i32 0, i32 0} +!12 = metadata !{i32 3, i32 9, metadata !13, null} +!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 9, i32 0, i32 1} +!14 = metadata !{metadata !15, metadata !15, i64 0} +!15 = metadata !{metadata !"int", metadata !16, i64 0} +!16 = metadata !{metadata !"omnipotent char", metadata !17, i64 0} +!17 = metadata !{metadata !"Simple C/C++ TBAA"} +!18 = metadata !{i32 3, i32 29, metadata !13, null} +!19 = metadata !{i32 4, i32 1, metadata !4, null}