diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index e8337600727..f34ae69779e 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -738,7 +738,8 @@ namespace { SmallVector& toErase); bool processNonLocalLoad(LoadInst* L, SmallVector& toErase); - bool processMemCpy(MemCpyInst* M, SmallVector& toErase); + bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, + SmallVector& toErase); bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, SmallVector& toErase); Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, @@ -1111,24 +1112,10 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be /// a memcpy from X to Z (or potentially a memmove, depending on circumstances). /// This allows later passes to remove the first memcpy altogether. -bool GVN::processMemCpy(MemCpyInst* M, +bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, SmallVector& toErase) { - MemoryDependenceAnalysis& MD = getAnalysis(); - - // First, we have to check that the dependency is another memcpy - Instruction* dep = MD.getDependency(M); - if (dep == MemoryDependenceAnalysis::None || - dep == MemoryDependenceAnalysis::NonLocal) - return false; - else if (CallInst* C = dyn_cast(dep)) - if (!isa(C)) - return performReturnSlotOptzn(M, C, toErase); - else if (!isa(dep)) - return false; - // We can only transforms memcpy's where the dest of one is the source of the // other - MemCpyInst* MDep = cast(dep); if (M->getSource() != MDep->getDest()) return false; @@ -1159,11 +1146,9 @@ bool GVN::processMemCpy(MemCpyInst* M, return false; // If all checks passed, then we can transform these memcpy's - bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32; - Function* MemMoveFun = Intrinsic::getDeclaration( + Function* MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - is32bit ? Intrinsic::memcpy_i32 : - Intrinsic::memcpy_i64); + M->getIntrinsicID()); std::vector args; args.push_back(M->getRawDest()); @@ -1171,8 +1156,9 @@ bool GVN::processMemCpy(MemCpyInst* M, args.push_back(M->getLength()); args.push_back(M->getAlignment()); - CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M); + CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M); + MemoryDependenceAnalysis& MD = getAnalysis(); if (MD.getDependency(C) == MDep) { MD.dropInstruction(M); toErase.push_back(M); @@ -1193,7 +1179,22 @@ bool GVN::processInstruction(Instruction* I, if (LoadInst* L = dyn_cast(I)) { return processLoad(L, lastSeenLoad, toErase); } else if (MemCpyInst* M = dyn_cast(I)) { - return processMemCpy(M, toErase); + MemoryDependenceAnalysis& MD = getAnalysis(); + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE + // b) call-memcpy xform for sret return slot optimization + Instruction* dep = MD.getDependency(M); + if (dep == MemoryDependenceAnalysis::None || + dep == MemoryDependenceAnalysis::NonLocal) + return false; + else if (CallInst* C = dyn_cast(dep)) { + if (!isa(C)) + return performReturnSlotOptzn(M, C, toErase); + } else if (!isa(dep)) + return false; + + return processMemCpy(M, cast(dep), toErase); } unsigned num = VN.lookup_or_add(I);