diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index b1a2ed39f24..c750ece41b4 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -673,19 +673,23 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (isa(U) || isa(U)) { for (User *UU : U->users()) srcUseList.push_back(UU); - } else if (GetElementPtrInst *G = dyn_cast(U)) { - if (G->hasAllZeroIndices()) - for (User *UU : U->users()) - srcUseList.push_back(UU); - else - return false; - } else if (const IntrinsicInst *IT = dyn_cast(U)) { - if (IT->getIntrinsicID() != Intrinsic::lifetime_start && - IT->getIntrinsicID() != Intrinsic::lifetime_end) - continue; - } else if (U != C && U != cpy) { - return false; + continue; } + if (GetElementPtrInst *G = dyn_cast(U)) { + if (!G->hasAllZeroIndices()) + return false; + + for (User *UU : U->users()) + srcUseList.push_back(UU); + continue; + } + if (const IntrinsicInst *IT = dyn_cast(U)) + if (IT->getIntrinsicID() == Intrinsic::lifetime_start || + IT->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + + if (U != C && U != cpy) + return false; } // Check that src isn't captured by the called function since the diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll index d702f3ff852..626317614c9 100644 --- a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll +++ b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll @@ -3,20 +3,47 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64)) { +define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) { entry-block: %a = alloca [8 x i64], align 8 - %1 = bitcast [8 x i64]* %a to i8* - call void @llvm.lifetime.start(i64 64, i8* %1) - call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 64, i32 8, i1 false) - %2 = bitcast [8 x i64]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 64, i32 8, i1 false) - call void @llvm.lifetime.end(i64 64, i8* %1) + %a.cast = bitcast [8 x i64]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %a.cast) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) + %sret.cast = bitcast [8 x i64]* %sret to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.lifetime.end(i64 64, i8* %a.cast) ret void ; CHECK-LABEL: @foo( -; CHECK: %1 = bitcast -; CHECK: call void @llvm.memset +; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64 +; CHECK-NOT: call void @llvm.memcpy +; CHECK: ret void +} + +define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) { +entry-block: + %a = alloca [8 x i64], align 8 + %a.cast = bitcast [8 x i64]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %a.cast) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) + %sret.cast = bitcast [8 x i64]* %sret to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false) + %out.cast = bitcast [8 x i64]* %out to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.lifetime.end(i64 64, i8* %a.cast) + ret void + +; CHECK-LABEL: @bar( +; CHECK: %[[a:[^=]+]] = alloca [8 x i64] +; CHECK: %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64 +; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[sret_cast]], i8* %[[a_cast]], i64 64 +; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32 +; CHECK: %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64 ; CHECK-NOT: call void @llvm.memcpy ; CHECK: ret void }