diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 56a35112aa1..9878a2e4e83 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -193,8 +193,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { return processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, Ev, BECount); // Handle the memcpy case here. - errs() << "Found strided store: " << *Ev << "\n"; - + // errs() << "Found strided store: " << *Ev << "\n"; return false; } @@ -202,13 +201,23 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { /// mayLoopModRefLocation - Return true if the specified loop might do a load or /// store to the same location that the specified store could store to, which is /// a loop-strided access. -static bool mayLoopModRefLocation(StoreInst *SI, Loop *L, AliasAnalysis &AA) { +static bool mayLoopModRefLocation(StoreInst *SI, Loop *L, const SCEV *BECount, + unsigned StoreSize, AliasAnalysis &AA) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. - // TODO: Could improve this for constant trip-count loops. - AliasAnalysis::Location StoreLoc = - AliasAnalysis::Location(SI->getPointerOperand()); + uint64_t AccessSize = AliasAnalysis::UnknownSize; + + // If the loop iterates a fixed number of times, we can refine the access size + // to be exactly the size of the memset, which is (BECount+1)*StoreSize + if (const SCEVConstant *BECst = dyn_cast(BECount)) + AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; + + // TODO: For this to be really effective, we have to dive into the pointer + // operand in the store. Store to &A[i] of 100 will always return may alias + // with store of &A[100], we need to StoreLoc to be "A" with size of 100, + // which will then no-alias a store to &A[100]. + AliasAnalysis::Location StoreLoc(SI->getPointerOperand(), AccessSize); for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) @@ -234,7 +243,8 @@ processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize, // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read // or write to the aliased location. Check for an alias. - bool Unsafe=mayLoopModRefLocation(SI, CurLoop, getAnalysis()); + bool Unsafe = mayLoopModRefLocation(SI, CurLoop, BECount, StoreSize, + getAnalysis()); SI->insertBefore(InstAfterStore); diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 7f96b3c669c..ef37b1dfdf7 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -65,3 +65,29 @@ for.end: ; preds = %entry ; CHECK-NOT: memset ; CHECK: ret void } + + +;; TODO: We should be able to promote this memset. Not yet though. +define void @test4(i8* %Base) nounwind ssp { +bb.nph: ; preds = %entry + %Base100 = getelementptr i8* %Base, i64 1000 + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %I.0.014 = getelementptr i8* %Base, i64 %indvar + store i8 0, i8* %I.0.014, align 1 + + ;; Store beyond the range memset, should be safe to promote. + store i8 42, i8* %Base100 + + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK-TODO: @test4 +; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false) +; CHECK-TODO-NOT: store +}