diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5c398a8b693..a44e798f121 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -48,7 +48,6 @@ #include "llvm/Module.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopDependenceAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -107,8 +106,6 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); @@ -125,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) -INITIALIZE_PASS_DEPENDENCY(LoopDependenceAnalysis) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) @@ -167,6 +163,15 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, } while (!NowDeadInsts.empty()); } +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { + if (Instruction *I = dyn_cast(V)) + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); +} + bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; @@ -363,16 +368,35 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { MSI, Ev, BECount); } -/// hasDependence - Uses the LoopDependenceAnalysis to determine whether 'Inst' -/// depends on any other value in the Loop 'L'. -static bool hasDependence(Instruction *Inst, Loop *L, - LoopDependenceAnalysis &LDA) { + +/// mayLoopAccessLocation - Return true if the specified loop might access the +/// specified pointer location, which is a loop-strided access. The 'Access' +/// argument specifies what the verboten forms of access are (read or write). +static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, + Loop *L, const SCEV *BECount, + unsigned StoreSize, AliasAnalysis &AA, + Instruction *IgnoredStore) { + // Get the location that may be stored across the loop. Since the access is + // strided positively through memory, we say that the modified location starts + // at the pointer and has infinite size. + uint64_t AccessSize = AliasAnalysis::UnknownSize; + + // If the loop iterates a fixed number of times, we can refine the access size + // to be exactly the size of the memset, which is (BECount+1)*StoreSize + if (const SCEVConstant *BECst = dyn_cast(BECount)) + AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; + + // TODO: For this to be really effective, we have to dive into the pointer + // operand in the store. Store to &A[i] of 100 will always return may alias + // with store of &A[100], we need to StoreLoc to be "A" with size of 100, + // which will then no-alias a store to &A[100]. + AliasAnalysis::Location StoreLoc(Ptr, AccessSize); + for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) - if (&*I != Inst && I->mayReadOrWriteMemory() && - (I->mayWriteToMemory() || Inst->mayWriteToMemory()) && - LDA.depends(Inst, I)) + if (&*I != IgnoredStore && + (AA.getModRefInfo(I, StoreLoc) & Access)) return true; return false; @@ -450,11 +474,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return false; } - // Make sure the store has no dependencies (i.e. other loads and stores) in - // the loop. - if (hasDependence(TheStore, CurLoop, getAnalysis())) - return false; - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. @@ -463,13 +482,25 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, SCEVExpander Expander(*SE, "loop-idiom"); // Okay, we have a strided store "p[i]" of a splattable value. We can turn - // this into a memset in the loop preheader now if we want. + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. unsigned AddrSpace = cast(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE, TLI); + return false; + } + // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -532,14 +563,6 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, LoadInst *LI = cast(SI->getValueOperand()); - // Make sure the load and the store have no dependencies (i.e. other loads and - // stores) in the loop. - // FIXME: If we want to form a memmove SI and LI can be dependent but the - // distance must be positive. LDA doesn't provide that info currently. - LoopDependenceAnalysis &LDA = getAnalysis(); - if (hasDependence(SI, CurLoop, LDA) || hasDependence(LI, CurLoop, LDA)) - return false; - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. @@ -548,16 +571,41 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, SCEVExpander Expander(*SE, "loop-idiom"); // Okay, we have a strided store "p[i]" of a loaded value. We can turn - // this into a memcpy in the loop preheader now if we want. + // this into a memcpy in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, StoreSize, + getAnalysis(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); + return false; + } + + // For a memcpy, we have to make sure that the input array is not being + // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); + return false; + } + // Okay, everything is safe, we can transform this! diff --git a/test/Transforms/LoopIdiom/multi-dimensional.ll b/test/Transforms/LoopIdiom/multi-dimensional.ll deleted file mode 100644 index 991f2688d76..00000000000 --- a/test/Transforms/LoopIdiom/multi-dimensional.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot } -%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] } - -define void @test1(%struct.ham* nocapture %arg) nounwind { -bb: - br label %bb1 - -bb1: ; preds = %bb11, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] - br label %bb2 - -bb2: ; preds = %bb2, %bb1 - %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ] - %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3 - store i32 0, i32* %tmp4, align 4 - %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3 - store i32 0, i32* %tmp5, align 4 - %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3 - store i32 0, i32* %tmp6, align 4 - %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3 - store i32 0, i32* %tmp7, align 4 - %tmp8 = add i64 %tmp3, 1 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = icmp eq i32 %tmp9, 8 - br i1 %tmp10, label %bb11, label %bb2 - -bb11: ; preds = %bb2 - %tmp12 = add i64 %tmp, 1 - %tmp13 = trunc i64 %tmp12 to i32 - %tmp14 = icmp eq i32 %tmp13, 16 - br i1 %tmp14, label %bb15, label %bb1 - -bb15: ; preds = %bb11 - ret void - -; CHECK: @test1 -; CHECK: bb1: -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: store -; CHECK: br -} diff --git a/test/Transforms/LoopIdiom/sideeffect.ll b/test/Transforms/LoopIdiom/sideeffect.ll deleted file mode 100644 index 460e5233f95..00000000000 --- a/test/Transforms/LoopIdiom/sideeffect.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -; PR9481 -define i32 @test1() nounwind uwtable ssp { -entry: - %a = alloca [10 x i8], align 1 - br label %for.body - -for.cond1.preheader: ; preds = %for.body - %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0 - %.pre = load i8* %arrayidx5.phi.trans.insert, align 1 - br label %for.body3 - -for.body: ; preds = %for.body, %entry - %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ] - call void (...)* @bar() nounwind - %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29 - store i8 23, i8* %arrayidx, align 1 - %indvars.iv.next30 = add i64 %indvars.iv29, 1 - %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 - %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000 - br i1 %exitcond32, label %for.cond1.preheader, label %for.body - -for.body3: ; preds = %for.body3, %for.cond1.preheader - %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ] - %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] - call void (...)* @bar() nounwind - %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv - %1 = load i8* %arrayidx7, align 1 - %add = add i8 %1, %0 - store i8 %add, i8* %arrayidx7, align 1 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 1000000 - br i1 %exitcond, label %for.end12, label %for.body3 - -for.end12: ; preds = %for.body3 - %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2 - %2 = load i8* %arrayidx13, align 1 - %conv14 = sext i8 %2 to i32 - %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6 - %3 = load i8* %arrayidx15, align 1 - %conv16 = sext i8 %3 to i32 - %add17 = add nsw i32 %conv16, %conv14 - ret i32 %add17 - -; CHECK: @test1 -; CHECK-NOT: @llvm.memset -} - -declare void @bar(...)