diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9709dfcc1f6..be524be0082 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -634,22 +634,24 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast(cpyDest)) { - // If the destination is an sret parameter then only accesses that are - // outside of the returned struct type can trap. - if (!A->hasStructRetAttr()) - return false; + if (A->getDereferenceableBytes() < srcSize) { + // If the destination is an sret parameter then only accesses that are + // outside of the returned struct type can trap. + if (!A->hasStructRetAttr()) + return false; - Type *StructTy = cast(A->getType())->getElementType(); - if (!StructTy->isSized()) { - // The call may never return and hence the copy-instruction may never - // be executed, and therefore it's not safe to say "the destination - // has at least bytes, as implied by the copy-instruction", - return false; + Type *StructTy = cast(A->getType())->getElementType(); + if (!StructTy->isSized()) { + // The call may never return and hence the copy-instruction may never + // be executed, and therefore it's not safe to say "the destination + // has at least bytes, as implied by the copy-instruction", + return false; + } + + uint64_t destSize = DL->getTypeAllocSize(StructTy); + if (destSize < srcSize) + return false; } - - uint64_t destSize = DL->getTypeAllocSize(StructTy); - if (destSize < srcSize) - return false; } else { return false; } diff --git a/test/Transforms/MemCpyOpt/callslot_deref.ll b/test/Transforms/MemCpyOpt/callslot_deref.ll new file mode 100644 index 00000000000..4d51552d015 --- /dev/null +++ b/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; all bytes of %dst that are touch by the memset are dereferenceable +define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { +; CHECK-LABEL: @must_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +} + +; memset touch more bytes than those guaranteed to be dereferenceable +define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { +; CHECK-LABEL: @must_not_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +}