From ab1f4ef9a2b7200c20a98fe9882656752f4452d5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 14 Oct 2014 00:31:29 +0000 Subject: [PATCH] Add some optional passes around the vectorizer to both better prepare the IR going into it and to clean up the IR produced by the vectorizers. Note that these are *off by default* right now while folks collect data on whether the performance tradeoff is reasonable. In a build of the 'opt' binary, I see about 2% compile time regression due to this change on average. This is in my mind essentially the worst expected case: very little of the opt binary is going to *benefit* from these extra passes. I've seen several benchmarks improve in performance my small amounts due to running these passes, and there are certain (rare) cases where these passes make a huge difference by either enabling the vectorizer at all or by hoisting runtime checks out of the outer loop. My primary motivation is to prevent people from seeing runtime check overhead in benchmarks where the existing passes and optimizers would be able to eliminate that. I've chosen the sequence of passes based on the kinds of things that seem likely to be relevant for the code at each stage: rotaing loops for the vectorizer, finding correlated values, loop invariants, and unswitching opportunities from any runtime checks, and cleaning up commonalities exposed by the SLP vectorizer. I'll be pinging existing threads where some of these issues have come up and will start new threads to get folks to benchmark and collect data on whether this is the right tradeoff or we should do something else. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/PassManagerBuilder.cpp | 33 ++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index d02e6975a19..877fc712ec7 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -48,6 +48,10 @@ UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); +static cl::opt ExtraVectorizerPasses( + "extra-vectorizer-passes", cl::init(false), cl::Hidden, + cl::desc("Run cleanup optimization passes after vectorization.")); + static cl::opt UseNewSROA("use-new-sroa", cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); @@ -283,6 +287,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. + if (ExtraVectorizerPasses) + MPM.add(createLoopRotatePass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when @@ -290,10 +301,29 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createInstructionCombiningPass()); + MPM.add(createLICMPass()); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + } if (RunSLPAfterLoopVectorization) { - if (SLPVectorize) + if (SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); + } + } if (BBVectorize) { MPM.add(createBBVectorizePass()); @@ -312,6 +342,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops