diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 93830b1f51e..0164cfc8592 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -287,7 +287,8 @@ bool SROA::performScalarRepl(Function &F) { const Type *VectorTy = 0; bool HadAVector = false; if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, - 0, unsigned(AllocaSize)) && IsNotTrivial) { + 0, unsigned(AllocaSize)) && IsNotTrivial && + AllocaSize <= 128) { AllocaInst *NewAI; // If we were able to find a vector type that can handle this with // insert/extract elements, and if there was at least one use that had @@ -721,8 +722,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, SmallVector &NewElts) { // If this is a memcpy/memmove, construct the other pointer as the - // appropriate type. + // appropriate type. The "Other" pointer is the pointer that goes to Value *OtherPtr = 0; + unsigned MemAlignment = MI->getAlignment()->getZExtValue(); if (MemCpyInst *MCI = dyn_cast(MI)) { if (BCInst == MCI->getRawDest()) OtherPtr = MCI->getRawSource(); @@ -771,22 +773,47 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = 0; + unsigned OtherEltAlign = MemAlignment; + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+utostr(i), MI); + uint64_t EltOffset; + const PointerType *OtherPtrTy = cast(OtherPtr->getType()); + if (const StructType *ST = + dyn_cast(OtherPtrTy->getElementType())) { + EltOffset = TD->getStructLayout(ST)->getElementOffset(i); + } else { + const Type *EltTy = + cast(OtherPtr->getType())->getElementType(); + EltOffset = TD->getTypePaddedSize(EltTy)*i; + } + + // The alignment of the other pointer is the guaranteed alignment of the + // element, which is affected by both the known alignment of the whole + // mem intrinsic and the alignment of the element. If the alignment of + // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the + // known alignment is just 4 bytes. + OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); } Value *EltPtr = NewElts[i]; - const Type *EltTy =cast(EltPtr->getType())->getElementType(); + const Type *EltTy = cast(EltPtr->getType())->getElementType(); // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { if (isa(MI) || isa(MI)) { - Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp", - MI); - new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI); + if (SROADest) { + // From Other to Alloca. + Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); + new StoreInst(Elt, EltPtr, MI); + } else { + // From Alloca to Other. + Value *Elt = new LoadInst(EltPtr, "tmp", MI); + new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); + } continue; } assert(isa(MI)); @@ -852,7 +879,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - Zero // Align + ConstantInt::get(Type::Int32Ty, OtherEltAlign) // Align }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } else { diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll new file mode 100644 index 00000000000..4fc491b2979 --- /dev/null +++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll @@ -0,0 +1,19 @@ +; The store into %p should end up with a known alignment of 1, since the memcpy +; is only known to access it with 1-byte alignment. +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {store i16 1, .*, align 1} +; PR3720 + + %struct.st = type { i16 } + +define void @f(i8* %p) nounwind { +entry: + %s = alloca %struct.st, align 4 ; <%struct.st*> [#uses=2] + %0 = getelementptr %struct.st* %s, i32 0, i32 0 ; [#uses=1] + store i16 1, i16* %0, align 4 + %s1 = bitcast %struct.st* %s to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %p, i8* %s1, i32 2, i32 1) + ret void +} + +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +