Revert r166390 "LoopIdiom: Replace custom dependence analysis with LoopDependenceAnalysis."

It passes all tests, produces better results than the old code but uses the wrong pass, LoopDependenceAnalysis, which is old and unmaintained. "Why is it still in tree?", you might ask. The answer is obviously: "To confuse developers." Just swapping in the new dependency pass sends the pass manager into an infinte loop, I'll try to figure out why tomorrow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166399 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-24 22:24:54 +00:00 · 2012-10-21 19:31:16 +00:00
parent 56acf63e35
commit 3740e798bc
3 changed files with 74 additions and 128 deletions
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -48,7 +48,6 @@
 #include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopDependenceAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -107,8 +106,6 @@ namespace {
      AU.addPreserved<AliasAnalysis>();
      AU.addRequired<ScalarEvolution>();
      AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<LoopDependenceAnalysis>();
-      AU.addPreserved<LoopDependenceAnalysis>();
      AU.addPreserved<DominatorTree>();
      AU.addRequired<DominatorTree>();
      AU.addRequired<TargetLibraryInfo>();
@@ -125,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopDependenceAnalysis)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                    false, false)
@@ -167,6 +163,15 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
  } while (!NowDeadInsts.empty());
 }

+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+                                    const TargetLibraryInfo *TLI) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (isInstructionTriviallyDead(I, TLI))
+      deleteDeadInstruction(I, SE, TLI);
+}
+
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
  CurLoop = L;

@@ -363,16 +368,35 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
                                 MSI, Ev, BECount);
 }

-/// hasDependence - Uses the LoopDependenceAnalysis to determine whether 'Inst'
-/// depends on any other value in the Loop 'L'.
-static bool hasDependence(Instruction *Inst, Loop *L,
-                          LoopDependenceAnalysis &LDA) {
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access.  The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+                                  Loop *L, const SCEV *BECount,
+                                  unsigned StoreSize, AliasAnalysis &AA,
+                                  Instruction *IgnoredStore) {
+  // Get the location that may be stored across the loop.  Since the access is
+  // strided positively through memory, we say that the modified location starts
+  // at the pointer and has infinite size.
+  uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+  // If the loop iterates a fixed number of times, we can refine the access size
+  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+  // TODO: For this to be really effective, we have to dive into the pointer
+  // operand in the store.  Store to &A[i] of 100 will always return may alias
+  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+  // which will then no-alias a store to &A[100].
+  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
       ++BI)
    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
-      if (&*I != Inst && I->mayReadOrWriteMemory() &&
-          (I->mayWriteToMemory() || Inst->mayWriteToMemory()) &&
-          LDA.depends(Inst, I))
+      if (&*I != IgnoredStore &&
+          (AA.getModRefInfo(I, StoreLoc) & Access))
        return true;

  return false;
@@ -450,11 +474,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
    return false;
  }

-  // Make sure the store has no dependencies (i.e. other loads and stores) in
-  // the loop.
-  if (hasDependence(TheStore, CurLoop, getAnalysis<LoopDependenceAnalysis>()))
-    return false;
-
  // The trip count of the loop and the base pointer of the addrec SCEV is
  // guaranteed to be loop invariant, which means that it should dominate the
  // header.  This allows us to insert code for it in the preheader.
@@ -463,13 +482,25 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
  SCEVExpander Expander(*SE, "loop-idiom");

  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
-  // this into a memset in the loop preheader now if we want.
+  // this into a memset in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the aliased location.  Check for any overlap by generating the
+  // base pointer and checking the region.
  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
  Value *BasePtr =
    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
                           Preheader->getTerminator());


+  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(BasePtr, *SE, TLI);
+    return false;
+  }
+
  // Okay, everything looks good, insert the memset.

  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
@@ -532,14 +563,6 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,

  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());

-  // Make sure the load and the store have no dependencies (i.e. other loads and
-  // stores) in the loop.
-  // FIXME: If we want to form a memmove SI and LI can be dependent but the
-  // distance must be positive. LDA doesn't provide that info currently.
-  LoopDependenceAnalysis &LDA = getAnalysis<LoopDependenceAnalysis>();
-  if (hasDependence(SI, CurLoop, LDA) || hasDependence(LI, CurLoop, LDA))
-    return false;
-
  // The trip count of the loop and the base pointer of the addrec SCEV is
  // guaranteed to be loop invariant, which means that it should dominate the
  // header.  This allows us to insert code for it in the preheader.
@@ -548,16 +571,41 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
  SCEVExpander Expander(*SE, "loop-idiom");

  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
-  // this into a memcpy in the loop preheader now if we want.
+  // this into a memcpy in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write the memory region we're storing to.  This includes the load that
+  // feeds the stores.  Check for an alias by generating the base address and
+  // checking everything.
  Value *StoreBasePtr =
    Expander.expandCodeFor(StoreEv->getStart(),
                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
                           Preheader->getTerminator());
+
+  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI)) {
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+    return false;
+  }
+
+  // For a memcpy, we have to make sure that the input array is not being
+  // mutated by the loop.
  Value *LoadBasePtr =
    Expander.expandCodeFor(LoadEv->getStart(),
                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
                           Preheader->getTerminator());

+  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+    return false;
+  }
+
  // Okay, everything is safe, we can transform this!


--- a/test/Transforms/LoopIdiom/multi-dimensional.ll
+++ b/test/Transforms/LoopIdiom/multi-dimensional.ll
@@ -1,49 +0,0 @@
-; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot }
-%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }
-
-define void @test1(%struct.ham* nocapture %arg) nounwind {
-bb:
-  br label %bb1
-
-bb1:                                              ; preds = %bb11, %bb
-  %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
-  br label %bb2
-
-bb2:                                              ; preds = %bb2, %bb1
-  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]
-  %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3
-  store i32 0, i32* %tmp4, align 4
-  %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3
-  store i32 0, i32* %tmp5, align 4
-  %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3
-  store i32 0, i32* %tmp6, align 4
-  %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3
-  store i32 0, i32* %tmp7, align 4
-  %tmp8 = add i64 %tmp3, 1
-  %tmp9 = trunc i64 %tmp8 to i32
-  %tmp10 = icmp eq i32 %tmp9, 8
-  br i1 %tmp10, label %bb11, label %bb2
-
-bb11:                                             ; preds = %bb2
-  %tmp12 = add i64 %tmp, 1
-  %tmp13 = trunc i64 %tmp12 to i32
-  %tmp14 = icmp eq i32 %tmp13, 16
-  br i1 %tmp14, label %bb15, label %bb1
-
-bb15:                                             ; preds = %bb11
-  ret void
-
-; CHECK: @test1
-; CHECK: bb1:
-; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-; CHECK-NOT: store
-; CHECK: br
-}
--- a/test/Transforms/LoopIdiom/sideeffect.ll
+++ b/test/Transforms/LoopIdiom/sideeffect.ll
@@ -1,53 +0,0 @@
-; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; PR9481
-define i32 @test1() nounwind uwtable ssp {
-entry:
-  %a = alloca [10 x i8], align 1
-  br label %for.body
-
-for.cond1.preheader:                              ; preds = %for.body
-  %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0
-  %.pre = load i8* %arrayidx5.phi.trans.insert, align 1
-  br label %for.body3
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ]
-  call void (...)* @bar() nounwind
-  %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29
-  store i8 23, i8* %arrayidx, align 1
-  %indvars.iv.next30 = add i64 %indvars.iv29, 1
-  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
-  %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000
-  br i1 %exitcond32, label %for.cond1.preheader, label %for.body
-
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-  %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]
-  %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-  call void (...)* @bar() nounwind
-  %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv
-  %1 = load i8* %arrayidx7, align 1
-  %add = add i8 %1, %0
-  store i8 %add, i8* %arrayidx7, align 1
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 1000000
-  br i1 %exitcond, label %for.end12, label %for.body3
-
-for.end12:                                        ; preds = %for.body3
-  %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2
-  %2 = load i8* %arrayidx13, align 1
-  %conv14 = sext i8 %2 to i32
-  %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6
-  %3 = load i8* %arrayidx15, align 1
-  %conv16 = sext i8 %3 to i32
-  %add17 = add nsw i32 %conv16, %conv14
-  ret i32 %add17
-
-; CHECK: @test1
-; CHECK-NOT: @llvm.memset
-}
-
-declare void @bar(...)