add support for forwarding mem intrinsic values to non-local loads.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90697 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 01:31:05 +00:00 · 2009-12-06 04:54:31 +00:00 · 2009-12-06 04:54:31 +00:00 · cb9cbc4949
commit cb9cbc4949
parent 0fce29b9d1
2 changed files with 97 additions and 33 deletions
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@ -1192,19 +1192,47 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
 struct AvailableValueInBlock {
  /// BB - The basic block in question.
  BasicBlock *BB;
+  enum ValType {
+    SimpleVal,  // A simple offsetted value that is accessed.
+    MemIntrin   // A memory intrinsic which is loaded from.
+  };
+  
  /// V - The value that is live out of the block.
-  Value *V;
-  /// Offset - The byte offset in V that is interesting for the load query.
+  PointerIntPair<Value *, 1, ValType> Val;
+  
+  /// Offset - The byte offset in Val that is interesting for the load query.
  unsigned Offset;
  
  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
                                   unsigned Offset = 0) {
    AvailableValueInBlock Res;
    Res.BB = BB;
-    Res.V = V;
+    Res.Val.setPointer(V);
+    Res.Val.setInt(SimpleVal);
    Res.Offset = Offset;
    return Res;
  }
+
+  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                     unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(MI);
+    Res.Val.setInt(MemIntrin);
+    Res.Offset = Offset;
+    return Res;
+  }
+  
+  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  Value *getSimpleValue() const {
+    assert(isSimpleValue() && "Wrong accessor");
+    return Val.getPointer();
+  }
+  
+  MemIntrinsic *getMemIntrinValue() const {
+    assert(!isSimpleValue() && "Wrong accessor");
+    return cast<MemIntrinsic>(Val.getPointer());
+  }
 };

 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@ -1221,30 +1249,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
  const Type *LoadTy = LI->getType();
  
  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    BasicBlock *BB = ValuesPerBlock[i].BB;
-    Value *AvailableVal = ValuesPerBlock[i].V;
-    unsigned Offset = ValuesPerBlock[i].Offset;
+    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+    BasicBlock *BB = AV.BB;
    
    if (SSAUpdate.HasValueForBlock(BB))
      continue;
-    
-    if (AvailableVal->getType() != LoadTy) {
-      assert(TD && "Need target data to handle type mismatch case");
-      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
-                                          BB->getTerminator(), *TD);
-      
-      if (Offset) {
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-              << *ValuesPerBlock[i].V << '\n'
+
+    unsigned Offset = AV.Offset;
+
+    Value *AvailableVal;
+    if (AV.isSimpleValue()) {
+      AvailableVal = AV.getSimpleValue();
+      if (AvailableVal->getType() != LoadTy) {
+        assert(TD && "Need target data to handle type mismatch case");
+        AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+                                            BB->getTerminator(), *TD);
+        
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+              << *AV.getSimpleValue() << '\n'
              << *AvailableVal << '\n' << "\n\n\n");
      }
-      
-      
-      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-            << *ValuesPerBlock[i].V << '\n'
+    } else {
+      AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
+                                            LoadTy, BB->getTerminator(), *TD);
+      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+            << "  " << *AV.getMemIntrinValue() << '\n'
            << *AvailableVal << '\n' << "\n\n\n");
    }
-    
    SSAUpdate.AddAvailableValue(BB, AvailableVal);
  }
  
@ -1324,19 +1355,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        }
      }

-#if 0
      // If the clobbering value is a memset/memcpy/memmove, see if we can
      // forward a value on from it.
-      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
        if (TD == 0)
          TD = getAnalysisIfAvailable<TargetData>();
        if (TD) {
-          int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
-          if (Offset != -1)
-            AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+                                                                  Offset));
+            continue;
+          }            
        }
      }
-#endif
      
      UnavailableBlocks.push_back(DepBB);
      continue;
@ -1462,19 +1494,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
  // to eliminate LI even if we insert uses in the other predecessors, we will
  // end up increasing code size.  Reject this by scanning for LI.
  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].V == LI)
+    if (ValuesPerBlock[i].isSimpleValue() &&
+        ValuesPerBlock[i].getSimpleValue() == LI)
      return false;

+  // FIXME: It is extremely unclear what this loop is doing, other than
+  // artificially restricting loadpre.
  if (isSinglePred) {
    bool isHot = false;
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+      const AvailableValueInBlock &AV = ValuesPerBlock[i];
+      if (AV.isSimpleValue())
        // "Hot" Instruction is in some loop (because it dominates its dep.
        // instruction).
-        if (DT->dominates(LI, I)) {
-          isHot = true;
-          break;
-        }
+        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+          if (DT->dominates(LI, I)) {
+            isHot = true;
+            break;
+          }
+    }

    // We are interested only in "hot" instructions. We don't want to do any
    // mis-optimizations here.
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@ -163,6 +163,31 @@ entry:
 ; CHECK-NEXT: ret float
 }

+;; non-local memset -> i16 load forwarding.
+define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
+  %P3 = bitcast i16* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  br label %Cont
+  
+F:
+  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  br label %Cont
+
+Cont:
+  %P2 = getelementptr i16* %P, i32 4
+  %A = load i16* %P2
+  ret i16 %A
+
+; CHECK: @memset_to_i16_nonlocal0
+; CHECK: Cont:
+; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
+; CHECK-NOT: load
+; CHECK: ret i16 %A
+}
+
+
 declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind


@ -192,6 +217,7 @@ Cont:
 ; CHECK: ret i8 %A
 }

+
 ;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
 ;; bitcast equivalence can be properly phi translated.
 define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {