diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index c2844375c44..4210f0d9b79 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -322,6 +322,7 @@ namespace { // Helper fuctions bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M); bool processMemMove(MemMoveInst *M); bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, @@ -511,6 +512,17 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { return false; } +bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { + // See if there is another memset or store neighboring this memset which + // allows us to widen out the memset to do a single larger store. + if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), + MSI->getValue())) { + BBI = I; // Don't invalidate iterator. + return true; + } + return false; +} + /// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having @@ -775,6 +787,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { return true; } } + return false; } @@ -884,11 +897,13 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { if (StoreInst *SI = dyn_cast(I)) MadeChange |= processStore(SI, BI); - else if (MemCpyInst *M = dyn_cast(I)) { + else if (MemSetInst *M = dyn_cast(I)) + RepeatInstruction = processMemSet(M, BI); + else if (MemCpyInst *M = dyn_cast(I)) RepeatInstruction = processMemCpy(M); - } else if (MemMoveInst *M = dyn_cast(I)) { + else if (MemMoveInst *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); - } else if (CallSite CS = (Value*)I) { + else if (CallSite CS = (Value*)I) { for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) if (CS.paramHasAttr(i+1, Attribute::ByVal)) MadeChange |= processByValArgument(CS, i); diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll index 523615d72d2..1ac97e9e6b9 100644 --- a/test/Transforms/MemCpyOpt/form-memset.ll +++ b/test/Transforms/MemCpyOpt/form-memset.ll @@ -164,6 +164,7 @@ entry: declare void @foo(%struct.MV*, %struct.MV*, i8*) +; Store followed by memset. define void @test3(i32* nocapture %P) nounwind ssp { entry: %arrayidx = getelementptr inbounds i32* %P, i64 1 @@ -177,6 +178,7 @@ entry: ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) } +; store followed by memset, different offset scenario define void @test4(i32* nocapture %P) nounwind ssp { entry: store i32 0, i32* %P, align 4 @@ -191,4 +193,30 @@ entry: declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +; Memset followed by store. +define void @test5(i32* nocapture %P) nounwind ssp { +entry: + %add.ptr = getelementptr inbounds i32* %P, i64 2 + %0 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) + %arrayidx = getelementptr inbounds i32* %P, i64 1 + store i32 0, i32* %arrayidx, align 4 + ret void +; CHECK: @test5 +; CHECK-NOT: store +; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) +} + +;; Memset followed by memset. +define void @test6(i32* nocapture %P) nounwind ssp { +entry: + %0 = bitcast i32* %P to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false) + %add.ptr = getelementptr inbounds i32* %P, i64 3 + %1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false) + ret void +; CHECK: @test6 +; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false) +}