diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index e8337600727..f34ae69779e 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -738,7 +738,8 @@ namespace {
                             SmallVector<Instruction*, 4>& toErase);
     bool processNonLocalLoad(LoadInst* L,
                              SmallVector<Instruction*, 4>& toErase);
-    bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
+    bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+                       SmallVector<Instruction*, 4>& toErase);
     bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
                                 SmallVector<Instruction*, 4>& toErase);
     Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
@@ -1111,24 +1112,10 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
 /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
 /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
 ///  This allows later passes to remove the first memcpy altogether.
-bool GVN::processMemCpy(MemCpyInst* M,
+bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
                         SmallVector<Instruction*, 4>& toErase) {
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  
-  // First, we have to check that the dependency is another memcpy
-  Instruction* dep = MD.getDependency(M);
-  if (dep == MemoryDependenceAnalysis::None ||
-      dep == MemoryDependenceAnalysis::NonLocal)
-    return false;
-  else if (CallInst* C = dyn_cast<CallInst>(dep))
-    if (!isa<MemCpyInst>(C))
-      return performReturnSlotOptzn(M, C, toErase);
-  else if (!isa<MemCpyInst>(dep))
-    return false;
-  
   // We can only transforms memcpy's where the dest of one is the source of the
   // other
-  MemCpyInst* MDep = cast<MemCpyInst>(dep);
   if (M->getSource() != MDep->getDest())
     return false;
   
@@ -1159,11 +1146,9 @@ bool GVN::processMemCpy(MemCpyInst* M,
     return false;
   
   // If all checks passed, then we can transform these memcpy's
-  bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
-  Function* MemMoveFun = Intrinsic::getDeclaration(
+  Function* MemCpyFun = Intrinsic::getDeclaration(
                                  M->getParent()->getParent()->getParent(),
-                                 is32bit ? Intrinsic::memcpy_i32 : 
-                                           Intrinsic::memcpy_i64);
+                                 M->getIntrinsicID());
     
   std::vector<Value*> args;
   args.push_back(M->getRawDest());
@@ -1171,8 +1156,9 @@ bool GVN::processMemCpy(MemCpyInst* M,
   args.push_back(M->getLength());
   args.push_back(M->getAlignment());
   
-  CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
+  CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M);
   
+  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
   if (MD.getDependency(C) == MDep) {
     MD.dropInstruction(M);
     toErase.push_back(M);
@@ -1193,7 +1179,22 @@ bool GVN::processInstruction(Instruction* I,
   if (LoadInst* L = dyn_cast<LoadInst>(I)) {
     return processLoad(L, lastSeenLoad, toErase);
   } else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
-    return processMemCpy(M, toErase);
+    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+    // The are two possible optimizations we can do for memcpy:
+    //   a) memcpy-memcpy xform which exposes redundance for DSE
+    //   b) call-memcpy xform for sret return slot optimization
+    Instruction* dep = MD.getDependency(M);
+    if (dep == MemoryDependenceAnalysis::None ||
+        dep == MemoryDependenceAnalysis::NonLocal)
+      return false;
+    else if (CallInst* C = dyn_cast<CallInst>(dep)) {
+      if (!isa<MemCpyInst>(C))
+        return performReturnSlotOptzn(M, C, toErase);
+    } else if (!isa<MemCpyInst>(dep))
+      return false;
+    
+    return processMemCpy(M, cast<MemCpyInst>(dep), toErase);
   }
   
   unsigned num = VN.lookup_or_add(I);