diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 92d1e2036b7..67ee70035af 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -178,6 +178,14 @@ bool SROA::performPromotion(Function &F) { return Changed; } +/// getNumSAElements - Return the number of elements in the specific struct or +/// array. +static uint64_t getNumSAElements(const Type *T) { + if (const StructType *ST = dyn_cast(T)) + return ST->getNumElements(); + return cast(T)->getNumElements(); +} + // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the malloc/alloca instructions in the function, removing // them if they are only used by getelementptr instructions. @@ -224,7 +232,10 @@ bool SROA::performScalarRepl(Function &F) { (isa(AI->getAllocatedType()) || isa(AI->getAllocatedType())) && AI->getAllocatedType()->isSized() && - TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) { + // Do not promote any struct whose size is larger than "128" bytes. + TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold && + // Do not promote any struct into more than "32" separate vars. + getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) { // Check that all of the users of the allocation are capable of being // transformed. switch (isSafeAllocaToScalarRepl(AI)) { @@ -672,11 +683,9 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = 0; if (OtherPtr) { - Value *Idx[2]; - Idx[0] = Zero; - Idx[1] = ConstantInt::get(Type::Int32Ty, i); + Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+utostr(i), + OtherPtr->getNameStr()+"."+utostr(i), MI); } diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll new file mode 100644 index 00000000000..e90dc024d7a --- /dev/null +++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {call.*mem} +; PR2369 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + +define void @memtest1(i8* %dst, i8* %src) nounwind { +entry: + %temp = alloca [100 x i8] ; <[100 x i8]*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %temp1 = bitcast [100 x i8]* %temp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 ) + %temp3 = bitcast [100 x i8]* %temp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 ) + ret void +} + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind