Fix a bug in the vectorization of wide load/store operations.

We used a SCEV to detect that A[X] is consecutive. We assumed that X was
the induction variable. But X can be any expression that uses the induction
for example: X = i + 2;



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166388 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-10-21 06:49:10 +00:00
parent c847872629
commit bb950854ac
2 changed files with 44 additions and 4 deletions

View File

@ -740,10 +740,15 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
break;
}
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
unsigned NumOperands = Gep->getNumOperands();
Gep2->setOperand(NumOperands - 1, Induction);
Gep2->setOperand(NumOperands - 1, LastIndex);
Ptr = Builder.Insert(Gep2);
Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
Value *Val = getVectorValue(SI->getValueOperand());
@ -764,10 +769,15 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
break;
}
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
unsigned NumOperands = Gep->getNumOperands();
Gep2->setOperand(NumOperands - 1, Induction);
Gep2->setOperand(NumOperands - 1, LastIndex);
Ptr = Builder.Insert(Gep2);
Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
LI = Builder.CreateLoad(Ptr);

View File

@ -0,0 +1,30 @@
; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@array = common global [1024 x i32] zeroinitializer, align 16
;CHECK: @array_at_plus_one
;CHECK: add <4 x i64>
;CHECK: trunc <4 x i64>
;CHECK: add i64 %index, 12
;CHECK: ret i32
define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
%2 = add nsw i64 %indvars.iv, 12
%3 = getelementptr inbounds [1024 x i32]* @array, i64 0, i64 %2
%4 = trunc i64 %indvars.iv to i32
store i32 %4, i32* %3, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %0
ret i32 undef
}