diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 29ad36f9616..75631b31e19 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -105,6 +105,7 @@ public: bool BBVectorize; bool SLPVectorize; bool LoopVectorize; + bool LateVectorize; private: /// ExtensionList - This is list of all of the extensions that are registered. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9d88608a41c..1917cc81dda 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -32,6 +32,11 @@ static cl::opt RunLoopVectorization("vectorize-loops", cl::desc("Run the Loop vectorization passes")); +static cl::opt +LateVectorization("late-vectorize", cl::init(false), cl::Hidden, + cl::desc("Run the vectorization pasess late in the pass " + "pipeline (after the inliner)")); + static cl::opt RunSLPVectorization("vectorize-slp", cl::desc("Run the SLP vectorization passes")); @@ -59,6 +64,7 @@ PassManagerBuilder::PassManagerBuilder() { BBVectorize = RunBBVectorization; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; + LateVectorize = LateVectorization; } PassManagerBuilder::~PassManagerBuilder() { @@ -189,8 +195,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) - MPM.add(createLoopVectorizePass()); + if (!LateVectorize && LoopVectorize && OptLevel > 1 && SizeLevel < 2) + MPM.add(createLoopVectorizePass()); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops @@ -210,26 +216,70 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_ScalarOptimizerLate, MPM); - if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (!LateVectorize) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(createGVNPass()); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass()); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } } MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. + // As an experimental mode, run any vectorization passes in a separate + // pipeline from the CGSCC pass manager that runs iteratively with the + // inliner. + if (LateVectorize) { + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + + // Add the various vectorization passes and relevant cleanup passes for + // them since we are no longer in the middle of the main scalar pipeline. + if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) { + MPM.add(createLoopVectorizePass()); + + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops + + // FIXME: Is this necessary/useful? Should we also do SimplifyCFG? + MPM.add(createInstructionCombiningPass()); + } + + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + // FIXME: Is this necessary/useful? Should we also do SimplifyCFG? + MPM.add(createInstructionCombiningPass()); + } + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass()); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } + } + if (!DisableUnitAtATime) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes