From 89e7b356f270e29c2e9e18c6bbd30e5925585f06 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 19 Oct 2012 01:24:18 +0000 Subject: [PATCH] vectorizer: Add support for reading and writing from the same memory location. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166255 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 11 +++++-- test/Transforms/LoopVectorize/increment.ll | 35 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/LoopVectorize/increment.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f5c9bb31e05..5152ec11e5b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -715,6 +715,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { ValueVector Reads; ValueVector Writes; + SmallPtrSet AnalyzedPtrs; unsigned NumPhis = 0; for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { Instruction *I = it; @@ -766,7 +767,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { DEBUG(dbgs() << "LV: Found a non-simple load.\n"); return false; } - GetUnderlyingObjects(Ld->getPointerOperand(), Reads, DL); + + Value* Ptr = Ld->getPointerOperand(); + if (AnalyzedPtrs.insert(Ptr)) + GetUnderlyingObjects(Ptr, Reads, DL); } // Record store pointers. Abort on all other instructions that write to @@ -778,7 +782,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { DEBUG(dbgs() << "LV: Found a non-simple store.\n"); return false; } - GetUnderlyingObjects(St->getPointerOperand(), Writes, DL); + + Value* Ptr = St->getPointerOperand(); + if (AnalyzedPtrs.insert(Ptr)) + GetUnderlyingObjects(St->getPointerOperand(), Writes, DL); } // We still don't handle functions. diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll new file mode 100644 index 00000000000..e944a9af92d --- /dev/null +++ b/test/Transforms/LoopVectorize/increment.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 + +; This is the loop. +; for (i=0; i +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @inc(i32 %n) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = trunc i64 %indvars.iv to i32 + %5 = add nsw i32 %3, %4 + store i32 %5, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +}