diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 506e9823c0e..dd445f63320 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { + unsigned AddrSpace = + cast(OtherPtr->getType())->getAddressSpace(); // Remove bitcasts and all-zero GEPs from OtherPtr. This is an // optimization, but it's also required to detect the corner case where @@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // OtherPtr may be a bitcast or GEP that currently being rewritten. (This // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } + OtherPtr = OtherPtr->stripPointerCasts(); + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, return; } - if (ConstantExpr *BCE = dyn_cast(OtherPtr)) - if (BCE->getOpcode() == Instruction::BitCast) - OtherPtr = BCE->getOperand(0); - // If the pointer is not the right type, insert a bitcast to the right // type. - if (OtherPtr->getType() != AI->getType()) - OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), - MI); + const Type *NewTy = + PointerType::get(AI->getType()->getElementType(), AddrSpace); + + if (OtherPtr->getType() != NewTy) + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } // Process each element of the aggregate. diff --git a/test/Transforms/ScalarRepl/address-space.ll b/test/Transforms/ScalarRepl/address-space.ll new file mode 100644 index 00000000000..318d4e75906 --- /dev/null +++ b/test/Transforms/ScalarRepl/address-space.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -scalarrepl < %s | FileCheck %s +; PR7437 - Make sure SROA preserves address space of memcpy when +; hacking on it. +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10" + +%struct.anon = type { [1 x float] } + +; CHECK: define void @Test( +; CHECK: load float addrspace(2)* +; CHECK-NEXT: fsub float +; CHECK: store float {{.*}}, float addrspace(2)* +define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind { +entry: + %s = alloca %struct.anon, align 4 ; <%struct.anon*> [#uses=3] + %arrayidx = getelementptr inbounds %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1] + %tmp1 = bitcast %struct.anon* %s to i8* ; [#uses=1] + %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; [#uses=1] + call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false) + %tmp3 = getelementptr inbounds %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1] + %arrayidx4 = getelementptr inbounds [1 x float]* %tmp3, i32 0, i64 0 ; [#uses=2] + %tmp5 = load float* %arrayidx4 ; [#uses=1] + %sub = fsub float %tmp5, 5.000000e+00 ; [#uses=1] + store float %sub, float* %arrayidx4 + %arrayidx7 = getelementptr inbounds %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1] + %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; [#uses=1] + %tmp9 = bitcast %struct.anon* %s to i8* ; [#uses=1] + call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind + +declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind +