LoopVectorizer: When -Os is used, vectorize only loops that dont require a tail loop. There is no testcase because I dont know of a way to initialize the loop vectorizer pass without adding an additional hidden flag.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169950 91177308-0d34-0410-b5e6-96231b3b80d8
2025-10-25 10:27:04 +00:00 · 2012-12-12 01:11:46 +00:00
parent 61f4dfe369
commit 5da804150d
6 changed files with 69 additions and 30 deletions
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -156,7 +156,7 @@ namespace {
      (void) llvm::createCorrelatedValuePropagationPass();
      (void) llvm::createMemDepPrinter();
      (void) llvm::createInstructionSimplifierPass();
-      (void) llvm::createLoopVectorizePass();
+      (void) llvm::createLoopVectorizePass(0);
      (void) llvm::createBBVectorizePass();

      (void)new llvm::IntervalPartition();
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -111,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 //
 // LoopVectorize - Create a loop vectorization pass.
 //
-Pass *createLoopVectorizePass();
+Pass *createLoopVectorizePass(bool OptForSize);

 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -189,7 +189,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
  MPM.add(createLoopDeletionPass());          // Delete dead loops

  if (LoopVectorize && OptLevel > 1)
-    MPM.add(createLoopVectorizePass());
+    MPM.add(createLoopVectorizePass(SizeLevel));

  if (!DisableUnrollLoops)
    MPM.add(createLoopUnrollPass());          // Unroll small loops
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -51,9 +51,12 @@ namespace {

 /// The LoopVectorize Pass.
 struct LoopVectorize : public LoopPass {
-  static char ID; // Pass identification, replacement for typeid
+  /// Pass identification, replacement for typeid
+  static char ID;
+  /// Optimize for size. Do not generate tail loops.
+  bool OptForSize;

-  LoopVectorize() : LoopPass(ID) {
+  explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) {
    initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
  }

@@ -85,23 +88,17 @@ struct LoopVectorize : public LoopPass {
    }

    // Select the preffered vectorization factor.
-    unsigned VF = 1;
-    if (VectorizationFactor == 0) {
-      const VectorTargetTransformInfo *VTTI = 0;
-      if (TTI)
-        VTTI = TTI->getVectorTargetTransformInfo();
-      // Use the cost model.
-      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
-      VF = CM.findBestVectorizationFactor();
+    const VectorTargetTransformInfo *VTTI = 0;
+    if (TTI)
+      VTTI = TTI->getVectorTargetTransformInfo();
+    // Use the cost model.
+    LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+    unsigned VF = CM.selectVectorizationFactor(OptForSize,
+                                                 VectorizationFactor);

-      if (VF == 1) {
-        DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-        return false;
-      }
-
-    } else {
-      // Use the user command flag.
-      VF = VectorizationFactor;
+    if (VF == 1) {
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+      return false;
    }

    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
@@ -1886,7 +1883,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
 }

 unsigned
-LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+                                                        unsigned UserVF) {
+  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+    DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+    return 1;
+  }
+
+  // Find the trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+  DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+  unsigned VF = MaxVectorSize;
+
+  // If we optimize the program for size, avoid creating the tail loop.
+  if (OptForSize) {
+    // If we are unable to calculate the trip count then don't try to vectorize.
+    if (TC < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+
+    // Find the maximum SIMD width that can fit within the trip count.
+    VF = TC % MaxVectorSize;
+
+    if (VF == 0)
+      VF = MaxVectorSize;
+
+    // If the trip count that we found modulo the vectorization factor is not
+    // zero then we require a tail.
+    if (VF < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+  }
+
+  if (UserVF != 0) {
+    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+    DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+    return UserVF;
+  }
+
  if (!VTTI) {
    DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
    return 1;
@@ -2121,8 +2159,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)

 namespace llvm {
-  Pass *createLoopVectorizePass() {
-    return new LoopVectorize();
+  Pass *createLoopVectorizePass(bool OptForSize = false) {
+    return new LoopVectorize(OptForSize);
  }
 }

--- a/lib/Transforms/Vectorize/LoopVectorize.h
+++ b/lib/Transforms/Vectorize/LoopVectorize.h
@@ -420,10 +420,11 @@ public:
                             const VectorTargetTransformInfo *Vtti):
  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }

-  /// Returns the most profitable vectorization factor for the loop that is
-  /// smaller or equal to the VF argument. This method checks every power
-  /// of two up to VF.
-  unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
+  /// Returns the most profitable vectorization factor in powers of two.
+  /// This method checks every power of two up to VF. If UserVF is not ZERO
+  /// then this vectorization factor will be selected if vectorization is
+  /// possible.
+  unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF);

 private:
  /// Returns the expected execution cost. The unit of the cost does
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-//===-- Vectorize.cpp -----------------------------------------------------===//
+   //===-- Vectorize.cpp -----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
 }

 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopVectorizePass());
+  unwrap(PM)->add(createLoopVectorizePass(0));
 }