diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index bc428802e89..182fd3cca43 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -264,23 +264,31 @@ class ConvertToScalarInfo { /// large integers unless there is some potential for optimization. bool HadNonMemTransferAccess; + /// HadDynamicAccess - True if some element of this alloca was dynamic. + /// We don't yet have support for turning a dynamic access into a large + /// integer. + bool HadDynamicAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), - VectorTy(0), HadNonMemTransferAccess(false) { } + VectorTy(0), HadNonMemTransferAccess(false), HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: - bool CanConvertToScalar(Value *V, uint64_t Offset); + bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx); void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); - void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); + void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset, + Value *NonConstantIdx); Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); }; } // end anonymous namespace. @@ -291,7 +299,7 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) + if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) return 0; // If an alloca has only memset / memcpy uses, it may still have an Unknown @@ -319,13 +327,18 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) return 0; + // Dynamic accesses on integers aren't yet supported. They need us to shift + // by a dynamic amount which could be difficult to work out as we might not + // know whether to use a left or right shift. + if (ScalarKind == Integer && HadDynamicAccess) + return 0; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0); + ConvertUsesToScalar(AI, NewAI, 0, 0); return NewAI; } @@ -412,7 +425,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { +bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, + Value* NonConstantIdx) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast(*UI); @@ -442,24 +456,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (BitCastInst *BCI = dyn_cast(User)) { if (!onlyUsedByLifetimeMarkers(BCI)) IsNotTrivial = true; // Can't be mem2reg'd. - if (!CanConvertToScalar(BCI, Offset)) + if (!CanConvertToScalar(BCI, Offset, NonConstantIdx)) return false; continue; } if (GetElementPtrInst *GEP = dyn_cast(User)) { // If this is a GEP with a variable indices, we can't handle it. - if (!GEP->hasAllConstantIndices()) + PointerType* PtrTy = dyn_cast(GEP->getPointerOperandType()); + if (!PtrTy) return false; // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return false; - uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), + Value *GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + if (!isa(PtrTy->getElementType())) + return false; + if (NonConstantIdx) + return false; + GEPNonConstantIdx = Indices.pop_back_val(); + if (!GEPNonConstantIdx->getType()->isIntegerTy(32)) + return false; + HadDynamicAccess = true; + } else + GEPNonConstantIdx = NonConstantIdx; + uint64_t GEPOffset = TD.getIndexedOffset(PtrTy, Indices); // See if all uses can be converted. - if (!CanConvertToScalar(GEP, Offset+GEPOffset)) + if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx)) return false; IsNotTrivial = true; // Can't be mem2reg'd. HadNonMemTransferAccess = true; @@ -469,6 +494,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; // Store of constant value. if (!isa(MSI->getValue())) return false; @@ -493,6 +521,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a memcpy or memmove into or out of the whole allocation, we // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; ConstantInt *Len = dyn_cast(MTI->getLength()); if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) return false; @@ -524,12 +555,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. By the end of this, there should be no uses of Ptr. void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, - uint64_t Offset) { + uint64_t Offset, + Value* NonConstantIdx) { while (!Ptr->use_empty()) { Instruction *User = cast(Ptr->use_back()); if (BitCastInst *CI = dyn_cast(User)) { - ConvertUsesToScalar(CI, NewAI, Offset); + ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx); CI->eraseFromParent(); continue; } @@ -537,9 +569,11 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); + if (!GEP->hasAllConstantIndices()) + NonConstantIdx = Indices.pop_back_val(); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx); GEP->eraseFromParent(); continue; } @@ -550,7 +584,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // The load is a bit extract from NewAI shifted right by Offset bits. Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal - = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, + NonConstantIdx, Builder); LI->replaceAllUsesWith(NewLoadVal); LI->eraseFromParent(); continue; @@ -560,7 +595,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(SI->getOperand(0) != Ptr && "Consistency error!"); Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, - Builder); + NonConstantIdx, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); @@ -575,6 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); + assert(!NonConstantIdx && "Cannot replace dynamic memset with insert"); int64_t SNumBytes = cast(MSI->getLength())->getSExtValue(); if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { unsigned NumBytes = static_cast(SNumBytes); @@ -591,7 +627,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, Builder); + Old, Offset, 0, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -607,6 +643,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast(User)) { assert(Offset == 0 && "must be store to start of alloca"); + assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert"); // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store @@ -679,7 +716,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, /// shifted to the right. Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) @@ -701,7 +739,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + Value *V = Builder.CreateExtractElement(FromVal, Idx); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType); return V; @@ -710,23 +758,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. if (StructType *ST = dyn_cast(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } if (ArrayType *AT = dyn_cast(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, Builder); + Offset+i*EltSize, 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -792,9 +844,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, /// /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. +/// +/// NonConstantIdx is an index value if there was a GEP with a non-constant +/// index value. If this is 0 then all GEPs used to find this insert address +/// are constant. Value *ConvertToScalarInfo:: ConvertScalar_InsertValue(Value *SV, Value *Old, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. Type *AllocaType = Old->getType(); @@ -815,26 +872,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, SV = Builder.CreateBitCast(SV, EltTy); uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + return Builder.CreateInsertElement(Old, SV, Idx); } // If SV is a first-class aggregate value, insert each value recursively. if (StructType *ST = dyn_cast(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); } return Old; } if (ArrayType *AT = dyn_cast(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); } return Old; } diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll index af7f4398d10..565cd761642 100644 --- a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll +++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll @@ -4,12 +4,14 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "x86_64-apple-darwin10.0.0" ; CHECK: @test1 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] +; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] +; CHECK: memset +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 ; Split the array but don't replace the memset with an insert ; element as its not a constant offset. +; The load, however, can be replaced with an extract element. define float @test1(i32 %idx1, i32 %idx2) { entry: %0 = alloca [4 x <4 x float>] @@ -23,13 +25,8 @@ entry: } ; CHECK: @test2 -; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 -; CHECK: ret float %ret +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2 ; Do SROA on the array when it has dynamic vector reads and writes. define float @test2(i32 %idx1, i32 %idx2) { @@ -61,13 +58,34 @@ entry: ret float %ret } -; CHECK: @test4 +; CHECK: test4 +; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <16 x float> %0, i32 %idx2 + +; Don't do SROA on a dynamically indexed vector when it spans +; more than one array element of the alloca array it is within. +; However, unlike test3, the store is on the vector type +; so SROA will convert the large alloca into the large vector +; type and do all accesses with insert/extract element +define float @test4(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* + store <16 x float> zeroinitializer, <16 x float>* %bigvec + %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test5 ; CHECK: %0 = alloca [4 x <4 x float>] ; CHECK-NOT: alloca ; Don't do SROA as the is a second dynamically indexed array ; which may span multiple elements of the alloca. -define float @test4(i32 %idx1, i32 %idx2) { +define float @test5(i32 %idx1, i32 %idx2) { entry: %0 = alloca [4 x <4 x float>] store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 @@ -80,15 +98,9 @@ entry: ret float %ret } -; CHECK: test5 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 +; CHECK: test6 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 %vector.pair = type { %vector.anon, %vector.anon } %vector.anon = type { %vector } @@ -99,7 +111,7 @@ entry: ; the original GEP, just the indices it needs to get to the correct offset of ; some type, not necessarily the dynamic vector. ; This test makes sure we don't have this crash. -define float @test5(i32 %idx1, i32 %idx2) { +define float @test6(i32 %idx1, i32 %idx2) { entry: %0 = alloca %vector.pair store %vector.pair zeroinitializer, %vector.pair* %0 @@ -110,21 +122,15 @@ entry: ret float %ret } -; CHECK: test6 -; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]] -; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1 -; CHECK: store float 1.000000e+00, float* %ptr1 -; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2 -; CHECK: %ret = load float* %ptr2 +; CHECK: test7 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 %array.pair = type { [2 x %array.anon], %array.anon } %array.anon = type { [2 x %vector] } -; This is the same as test5 and tests the same crash, but on arrays. -define float @test6(i32 %idx1, i32 %idx2) { +; This is the same as test6 and tests the same crash, but on arrays. +define float @test7(i32 %idx1, i32 %idx2) { entry: %0 = alloca %array.pair store %array.pair zeroinitializer, %array.pair* %0 @@ -135,4 +141,27 @@ entry: ret float %ret } +; CHECK: test8 +; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1 +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]] +; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2 +; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]] + +; Do SROA on the vector when it has dynamic vector reads and writes +; from a non-zero offset. +define float @test8(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca <4 x float> + store <4 x float> zeroinitializer, <4 x float>* %0 + %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1 + %ptr2 = bitcast float* %ptr1 to <3 x float>* + %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1 + store float 1.0, float* %ptr3 + %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2 + %ptr5 = bitcast float* %ptr4 to <2 x float>* + %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2 + %ret = load float* %ptr6 + ret float %ret +} + declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)