diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 78674952847..c72c2dc41fc 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -172,6 +172,12 @@ static cl::opt SmallLoopCost( "small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller.")); +static cl::opt LoopVectorizeWithBlockFrequency( + "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to access PGO " + "heuristics minimizing code growth in cold regions and being more " + "aggressive in hot regions.")); + // Runtime unroll loops for load/store throughput. static cl::opt EnableLoadStoreRuntimeUnroll( "enable-loadstore-runtime-unroll", cl::init(false), cl::Hidden, @@ -1099,9 +1105,13 @@ struct LoopVectorize : public FunctionPass { // Compute the weighted frequency of this loop being executed and see if it // is less than 20% of the function entry baseline frequency. Note that we // always have a canonical loop here because we think we *can* vectoriez. - BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq) - OptForSize = true; + // FIXME: This is hidden behind a flag due to pervasive problems with + // exactly what block frequency models. + if (LoopVectorizeWithBlockFrequency) { + BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); + if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq) + OptForSize = true; + } // Check the function attributes to see if implicit floats are allowed.a // FIXME: This check doesn't seem possibly correct -- what if the loop is diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll index 1d46366369c..dfa4faaf09e 100644 --- a/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/test/Transforms/LoopVectorize/X86/small-size.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0"