From ae3b652f5cc19d83b6466d4fa70a7d1c7fb6d06c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 12 Dec 2012 19:29:45 +0000 Subject: [PATCH] LoopVectorizer: Use the "optsize" attribute to decide if we are allowed to increase the function size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170004 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LinkAllPasses.h | 2 +- include/llvm/Transforms/Vectorize.h | 2 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 17 +- lib/Transforms/Vectorize/Vectorize.cpp | 2 +- test/Transforms/LoopVectorize/small-size.ll | 170 ++++++++++++++++++++ 6 files changed, 185 insertions(+), 10 deletions(-) create mode 100644 test/Transforms/LoopVectorize/small-size.ll diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 48d7c40642d..baf8550edc8 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -156,7 +156,7 @@ namespace { (void) llvm::createCorrelatedValuePropagationPass(); (void) llvm::createMemDepPrinter(); (void) llvm::createInstructionSimplifierPass(); - (void) llvm::createLoopVectorizePass(0); + (void) llvm::createLoopVectorizePass(); (void) llvm::createBBVectorizePass(); (void)new llvm::IntervalPartition(); diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 81864f32eff..1ba4d22d5f5 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -111,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); // // LoopVectorize - Create a loop vectorization pass. // -Pass *createLoopVectorizePass(bool OptForSize); +Pass *createLoopVectorizePass(); //===----------------------------------------------------------------------===// /// @brief Vectorize the BasicBlock. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0862786127b..a9a9f2eeceb 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -189,7 +189,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopDeletionPass()); // Delete dead loops if (LoopVectorize && OptLevel > 1) - MPM.add(createLoopVectorizePass(SizeLevel)); + MPM.add(createLoopVectorizePass()); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index da073c5e599..749b664f53f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -53,10 +53,8 @@ namespace { struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid static char ID; - /// Optimize for size. Do not generate tail loops. - bool OptForSize; - explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) { + explicit LoopVectorize() : LoopPass(ID) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -93,8 +91,15 @@ struct LoopVectorize : public LoopPass { VTTI = TTI->getVectorTargetTransformInfo(); // Use the cost model. LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); + + // Check the function attribues to find out if this function should be + // optimized for size. + Function *F = L->getHeader()->getParent(); + bool OptForSize = + F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize); + unsigned VF = CM.selectVectorizationFactor(OptForSize, - VectorizationFactor); + VectorizationFactor); if (VF == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); @@ -2159,8 +2164,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { - Pass *createLoopVectorizePass(bool OptForSize = false) { - return new LoopVectorize(OptForSize); + Pass *createLoopVectorizePass() { + return new LoopVectorize(); } } diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index cf7d4ee8b2e..19eefd2f87e 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { } void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopVectorizePass(0)); + unwrap(PM)->add(createLoopVectorizePass()); } diff --git a/test/Transforms/LoopVectorize/small-size.ll b/test/Transforms/LoopVectorize/small-size.ll new file mode 100644 index 00000000000..deb0bb2f87d --- /dev/null +++ b/test/Transforms/LoopVectorize/small-size.ll @@ -0,0 +1,170 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 +@G = common global [32 x [1024 x i32]] zeroinitializer, align 16 +@ub = common global [1024 x i32] zeroinitializer, align 16 +@uc = common global [1024 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@fa = common global [1024 x float] zeroinitializer, align 16 +@fb = common global [1024 x float] zeroinitializer, align 16 +@ic = common global [1024 x i32] zeroinitializer, align 16 +@da = common global [1024 x float] zeroinitializer, align 16 +@db = common global [1024 x float] zeroinitializer, align 16 +@dc = common global [1024 x float] zeroinitializer, align 16 +@dd = common global [1024 x float] zeroinitializer, align 16 +@dj = common global [1024 x i32] zeroinitializer, align 16 + +; We can optimize this test without a tail. +;CHECK: @example1 +;CHECK: load <4 x i32> +;CHECK: add nsw <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1() optsize { + br label %1 + +;