diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 644625f56a0..98d5a027013 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1356,6 +1356,16 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, continue; } } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + if (ConstantInt *Len = dyn_cast(MTI->getLength())) + if (Len->getZExtValue() == AllocaSize && Offset == 0) { + IsNotTrivial = true; + continue; + } + } // Ignore dbg intrinsic. if (isa(User)) @@ -1440,6 +1450,44 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { MSI->eraseFromParent(); continue; } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + assert(Offset == 0 && "must be store to start of alloca"); + + // If the source and destination are both to the same alloca, then this is + // a noop copy-to-self, just delete it. Otherwise, emit a load and store + // as appropriate. + AllocaInst *OrigAI = cast(Ptr->getUnderlyingObject()); + + if (MTI->getSource()->getUnderlyingObject() != OrigAI) { + // Dest must be OrigAI, change this to be a load from the original + // pointer (bitcasted), then a store to our new alloca. + assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); + Value *SrcPtr = MTI->getSource(); + SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); + + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); + SrcVal->setAlignment(MTI->getAlignment()); + Builder.CreateStore(SrcVal, NewAI); + } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { + // Src must be OrigAI, change this to be a load from NewAI then a store + // through the original dest pointer (bitcasted). + assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); + LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); + + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); + NewStore->setAlignment(MTI->getAlignment()); + } else { + // Noop transfer. Src == Dst + } + + + MTI->eraseFromParent(); + continue; + } // If user is a dbg info intrinsic then it is safe to remove it. if (isa(User)) { diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll index e90dc024d7a..8fbbb6749a9 100644 --- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll +++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll @@ -6,12 +6,11 @@ target triple = "i386-apple-darwin8" define void @memtest1(i8* %dst, i8* %src) nounwind { entry: - %temp = alloca [100 x i8] ; <[100 x i8]*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %temp1 = bitcast [100 x i8]* %temp to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 ) - %temp3 = bitcast [100 x i8]* %temp to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 ) + %temp = alloca [200 x i8] ; <[100 x i8]*> [#uses=2] + %temp1 = bitcast [200 x i8]* %temp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 ) + %temp3 = bitcast [200 x i8]* %temp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 ) ret void } diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll new file mode 100644 index 00000000000..dc947b0d715 --- /dev/null +++ b/test/Transforms/ScalarRepl/vector_memcpy.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A} +define <16 x float> @foo(<16 x float> %A) nounwind { + %tmp = alloca <16 x float>, align 16 + %tmp2 = alloca <16 x float>, align 16 + store <16 x float> %A, <16 x float>* %tmp + %s = bitcast <16 x float>* %tmp to i8* + %s2 = bitcast <16 x float>* %tmp2 to i8* + call void @llvm.memcpy.i64(i8* %s2, i8* %s, i64 64, i32 16) + + %R = load <16 x float>* %tmp2 + ret <16 x float> %R +} + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +