diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 8178c270759..5f8ab517df9 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -2481,19 +2481,22 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, } if (CallSite CS = U) { - // If this is a readonly/readnone call site, then we know it is just a - // load and we can ignore it. - if (CS.onlyReadsMemory()) - continue; - // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(UI)) continue; + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || + CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) + continue; + // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - unsigned ArgNo = CS.getArgumentNo(UI); if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) continue; } diff --git a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll new file mode 100644 index 00000000000..816cb60e82a --- /dev/null +++ b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +; PR9820 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16 + +define i32* @noop(i32* %p_29) nounwind readnone { +entry: + ret i32* %p_29 +} + +define i32 @main() nounwind { +entry: + %l_10 = alloca [4 x i32], align 16 + %tmp = bitcast [4 x i32]* %l_10 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false) +; CHECK: call void @llvm.memcpy + %arrayidx = getelementptr inbounds [4 x i32]* %l_10, i64 0, i64 0 + %call = call i32* @noop(i32* %arrayidx) + store i32 0, i32* %call + ret i32 0 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind