PR14448 - prevent the loop vectorizer from vectorizing the same loop twice.

The LoopVectorizer often runs multiple times on the same function due to inlining.
When this happens the loop vectorizer often vectorizes the same loops multiple times, increasing code size and adding unneeded branches.
With this patch, the vectorizer during vectorization puts metadata on scalar loops and marks them as 'already vectorized' so that it knows to ignore them when it sees them a second time.

PR14448.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176399 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-03-02 01:33:49 +00:00
parent 328d1b6500
commit 5290baacb8
2 changed files with 93 additions and 0 deletions

View File

@ -116,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
/// number of pointers. Notice that the check is quadratic!
static const unsigned RuntimeMemoryCheckThreshold = 4;
/// We use a metadata with this name to indicate that a scalar loop was
/// vectorized and that we don't need to re-vectorize it if we run into it
/// again.
static const char*
AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
namespace {
// Forward declarations.
@ -1159,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
// Mark the old scalar loop with metadata that tells us not to vectorize this
// loop again if we run into it.
MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
@ -2224,6 +2235,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
// If we marked the scalar loop as "already vectorized" then no need
// to vectorize it again.
if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
DEBUG(dbgs() << "LV: This loop was vectorized before\n");
return false;
}
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {

View File

@ -0,0 +1,75 @@
; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
;
; We want to make sure that we are vectorizeing the scalar loop only once
; even if the pass manager runs the vectorizer multiple times due to inlining.
; This test checks that we add metadata to vectorized loops
; CHECK: _Z4foo1Pii
; CHECK: <4 x i32>
; CHECK: llvm.vectorizer.already_vectorized
; CHECK: ret
; This test comes from the loop:
;
;int foo (int *A, int n) {
; return std::accumulate(A, A + n, 0);
;}
define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
entry:
%idx.ext = sext i32 %n to i64
%add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
%cmp3.i = icmp eq i32 %n, 0
br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
for.body.i: ; preds = %entry, %for.body.i
%__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
%__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
%0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
%add.i = add nsw i32 %0, %__init.addr.05.i
%incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
%cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry
%__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
ret i32 %__init.addr.0.lcssa.i
}
; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
; CHECK: _Z4foo2Pii
; CHECK-NOT: <4 x i32>
; CHECK: llvm.vectorizer.already_vectorized
; CHECK: ret
define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
entry:
%idx.ext = sext i32 %n to i64
%add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
%cmp3.i = icmp eq i32 %n, 0
br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
for.body.i: ; preds = %entry, %for.body.i
%__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
%__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
%0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
%add.i = add nsw i32 %0, %__init.addr.05.i
%incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
%cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry
%__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
ret i32 %__init.addr.0.lcssa.i
}
attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
!3 = metadata !{}