diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 7b7e39f6c41..4f3dc7a7dce 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } return; } + + if (Argument *A = dyn_cast(V)) { + // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValAttr()) + if (unsigned Align = A->getParamAlignment()) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + return; + } - KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything. + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 04edfd43a28..360639ec95b 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" @@ -866,12 +867,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; - // Get the alignment of the byval. If it is greater than the memcpy, then we - // can't do the substitution. If the call doesn't specify the alignment, then - // it is some target specific value that we can't know. + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); - if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) - return false; + if (ByValAlign == 0) return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + if (MDep->getAlignment() < ByValAlign && + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) + return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index b387d32a7d5..5c6a94ce5c9 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -109,3 +109,23 @@ define void @test6(i8 *%P) { ; CHECK-NEXT: ret void } + +; PR9794 - Should forward memcpy into byval argument even though the memcpy +; isn't itself 8 byte aligned. +%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp { +entry: + %agg.tmp = alloca %struct.p, align 4 + %tmp = bitcast %struct.p* %agg.tmp to i8* + %tmp1 = bitcast %struct.p* %q to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false) + %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind + ret i32 %call +; CHECK: @test7 +; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind +} + +declare i32 @g(%struct.p* byval align 8) + +