diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 72127bb0d6e..550757a09f5 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -876,6 +876,12 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, IRBuilder<> Builder(MemCpy->getNextNode()); + // If the sizes have different types (i32 vs i64), promote both to i64. + if (DestSize->getType() != SrcSize->getType()) { + DestSize = Builder.CreateZExt(DestSize, Builder.getInt64Ty()); + SrcSize = Builder.CreateZExt(SrcSize, Builder.getInt64Ty()); + } + Value *MemsetLen = Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize), ConstantInt::getNullValue(DestSize->getType()), diff --git a/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll index 5fa5a915a5a..cc94f58dbfb 100644 --- a/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ b/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -8,14 +8,44 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 0, i64 [[SIZE]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) ; CHECK-NEXT: ret void -define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) { - call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) +define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) { + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void } +; CHECK-LABEL: define void @test_different_types_i32_i64 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-DAG: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i64 +; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size +; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size +; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size +; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) +; CHECK-NEXT: ret void +define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) { + call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: define void @test_different_types_i64_i32 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) +; CHECK-DAG: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i64 +; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 [[SRCSIZE]] +; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]] +; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]] +; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) +; CHECK-NEXT: ret void +define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) { + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) + ret void +} + ; CHECK-LABEL: define void @test_align_same ; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false) define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) { @@ -52,3 +82,5 @@ define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i6 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)