diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 26317ccafb6..6b79695d6f3 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -82,6 +82,10 @@ namespace { /// The alloca to promote. AllocaInst *AI; + /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite + /// looping and avoid redundant work. + SmallPtrSet CheckedPHIs; + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; @@ -116,10 +120,12 @@ namespace { bool isSafeAllocaToScalarRepl(AllocaInst *AI); void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); + void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, + AllocaInfo &Info); void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, const Type *MemOpType, bool isStore, AllocaInfo &Info, - Instruction *TheAccess); + Instruction *TheAccess, bool AllowWholeAccess); bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, const Type *&IdxTy); @@ -1086,13 +1092,14 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, if (Length == 0) return MarkUnsafe(Info, User); isSafeMemAccess(Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 0, Info, MI); + UI.getOperandNo() == 0, Info, MI, + true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast(User)) { if (LI->isVolatile()) return MarkUnsafe(Info, User); const Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), - LIType, false, Info, LI); + LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast(User)) { @@ -1102,8 +1109,65 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, const Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), - SIType, true, Info, SI); + SIType, true, Info, SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; + } else if (isa(User) || isa(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); + } else { + return MarkUnsafe(Info, User); + } + if (Info.isUnsafe) return; + } +} + + +/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer +/// derived from the alloca, we can often still split the alloca into elements. +/// This is useful if we have a large alloca where one element is phi'd +/// together somewhere: we can SRoA and promote all the other elements even if +/// we end up not being able to promote this one. +/// +/// All we require is that the uses of the PHI do not index into other parts of +/// the alloca. The most important use case for this is single load and stores +/// that are PHI'd together, which can happen due to code sinking. +void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, + AllocaInfo &Info) { + // If we've already checked this PHI, don't do it again. + if (PHINode *PN = dyn_cast(I)) + if (!Info.CheckedPHIs.insert(PN)) + return; + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); + + if (BitCastInst *BC = dyn_cast(User)) { + isSafePHISelectUseForScalarRepl(BC, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + // Only allow "bitcast" GEPs for simplicity. We could generalize this, + // but would have to prove that we're staying inside of an element being + // promoted. + if (!GEPI->hasAllZeroIndices()) + return MarkUnsafe(Info, User); + isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); + } else if (LoadInst *LI = dyn_cast(User)) { + if (LI->isVolatile()) + return MarkUnsafe(Info, User); + const Type *LIType = LI->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info, LI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + + } else if (StoreInst *SI = dyn_cast(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (SI->isVolatile() || SI->getOperand(0) == I) + return MarkUnsafe(Info, User); + + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info, SI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (isa(User) || isa(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { return MarkUnsafe(Info, User); } @@ -1187,11 +1251,15 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) { /// alloca or has an offset and size that corresponds to a component element /// within it. The offset checked here may have been formed from a GEP with a /// pointer bitcasted to a different type. +/// +/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a +/// unit. If false, it only allows accesses known to be in a single element. void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info, Instruction *TheAccess) { + AllocaInfo &Info, Instruction *TheAccess, + bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. - if (Offset == 0 && + if (Offset == 0 && AllowWholeAccess && MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { // This can be safe for MemIntrinsics (where MemOpType is 0) and integer // loads/stores (which are essentially the same as the MemIntrinsics with @@ -1257,14 +1325,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { /// instruction. void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI++); if (BitCastInst *BC = dyn_cast(User)) { RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + continue; + } + + if (GetElementPtrInst *GEPI = dyn_cast(User)) { RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast(User)) { + continue; + } + + if (MemIntrinsic *MI = dyn_cast(User)) { ConstantInt *Length = dyn_cast(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); if (Offset == 0 && @@ -1272,7 +1347,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); // Otherwise the intrinsic can only touch a single element and the // address operand will be updated, so nothing else needs to be done. - } else if (LoadInst *LI = dyn_cast(User)) { + continue; + } + + if (LoadInst *LI = dyn_cast(User)) { const Type *LIType = LI->getType(); if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { @@ -1297,7 +1375,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // If this is a load of the entire alloca to an integer, rewrite it. RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - } else if (StoreInst *SI = dyn_cast(User)) { + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { Value *Val = SI->getOperand(0); const Type *SIType = Val->getType(); if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { @@ -1320,6 +1401,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } + continue; + } + + if (isa(User) || isa(User)) { + // If we have a PHI user of the alloca itself (as opposed to a GEP or + // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to + // the new pointer. + if (!isa(I)) continue; + + assert(Offset == 0 && NewElts[0] && + "Direct alloca use should have a zero offset"); + + // If we have a use of the alloca, we know the derived uses will be + // utilizing just the first element of the scalarized result. Insert a + // bitcast of the first alloca before the user as required. + AllocaInst *NewAI = NewElts[0]; + BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); + NewAI->moveBefore(BCI); + TheUse = BCI; + continue; } } } @@ -1859,6 +1960,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return false; } } + return true; } diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll new file mode 100644 index 00000000000..06ee2081513 --- /dev/null +++ b/test/Transforms/ScalarRepl/phi-select.ll @@ -0,0 +1,78 @@ +; RUN: opt %s -scalarrepl -S | FileCheck %s +; Test promotion of allocas that have phis and select users. +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.2" + +%struct.X = type { i32 } +%PairTy = type {i32, i32} + +; CHECK: @test1 +; CHECK: %a.0 = alloca i32 +; CHECK: %b.0 = alloca i32 +define i32 @test1(i32 %x) nounwind readnone ssp { +entry: + %a = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2] + %b = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2] + %0 = getelementptr inbounds %struct.X* %a, i64 0, i32 0 ; [#uses=1] + store i32 1, i32* %0, align 8 + %1 = getelementptr inbounds %struct.X* %b, i64 0, i32 0 ; [#uses=1] + store i32 2, i32* %1, align 8 + %2 = icmp eq i32 %x, 0 ; [#uses=1] + %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1] + %3 = getelementptr inbounds %struct.X* %p.0, i64 0, i32 0 ; [#uses=1] + %4 = load i32* %3, align 8 ; [#uses=1] + ret i32 %4 +} + +; CHECK: @test2 +; CHECK: %A.0 = alloca i32 +; CHECK: %A.1 = alloca i32 +define i32 @test2(i1 %c) { +entry: + %A = alloca {i32, i32} + %B = getelementptr {i32, i32}* %A, i32 0, i32 0 + store i32 1, i32* %B + br i1 %c, label %T, label %F +T: + %C = getelementptr {i32, i32}* %A, i32 0, i32 1 + store i32 2, i32* %B + br label %F +F: + %X = phi i32* [%B, %entry], [%C, %T] + %Q = load i32* %X + ret i32 %Q +} + +; CHECK: @test3 +; CHECK: %A.0 = alloca i32 +; CHECK: %A.1 = alloca i32 +; rdar://8904039 +define i32 @test3(i1 %c) { +entry: + %A = alloca {i32, i32} + %B = getelementptr {i32, i32}* %A, i32 0, i32 0 + store i32 1, i32* %B + %C = getelementptr {i32, i32}* %A, i32 0, i32 1 + store i32 2, i32* %B + + %X = select i1 %c, i32* %B, i32* %C + %Q = load i32* %X + ret i32 %Q +} + +;; We can't scalarize this, a use of the select is not an element access. +define i64 @test4(i1 %c) { +entry: + %A = alloca %PairTy + ; CHECK: @test4 + ; CHECK: %A = alloca %PairTy + %B = getelementptr {i32, i32}* %A, i32 0, i32 0 + store i32 1, i32* %B + %C = getelementptr {i32, i32}* %A, i32 0, i32 1 + store i32 2, i32* %B + + %X = select i1 %c, i32* %B, i32* %C + %Y = bitcast i32* %X to i64* + %Q = load i64* %Y + ret i64 %Q +}