diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 4838d856ae3..65a30f2816f 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -86,6 +86,9 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, } } + PointerType *AddrTy = cast(V->getType()); + uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0; + // If we found a base allocated type from either an alloca or global variable, // try to see if we are definitively within the allocated region. We need to // know the size of the base type and the loaded type to do anything in this @@ -96,8 +99,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = DL->getTypeStoreSize(AddrTy->getElementType()); if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; @@ -111,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // the load entirely). BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + // We can at least always strip pointer casts even though we can't use the + // base here. + V = V->stripPointerCasts(); + while (BBI != E) { --BBI; @@ -120,13 +125,25 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, !isa(BBI)) return false; - if (LoadInst *LI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(LI->getOperand(0), V)) - return true; - } else if (StoreInst *SI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(SI->getOperand(1), V)) - return true; - } + Value *AccessedPtr; + if (LoadInst *LI = dyn_cast(BBI)) + AccessedPtr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(BBI)) + AccessedPtr = SI->getPointerOperand(); + else + continue; + + // Handle trivial cases even w/o DataLayout or other work. + if (AccessedPtr == V) + return true; + + if (!DL) + continue; + + auto *AccessedTy = cast(AccessedPtr->getType()); + if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && + LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType())) + return true; } return false; } @@ -157,12 +174,12 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + Type *AccessTy = cast(Ptr->getType())->getElementType(); + // If we're using alias analysis to disambiguate get the size of *Ptr. - uint64_t AccessSize = 0; - if (AA) { - Type *AccessTy = cast(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } + uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0; + + Value *StrippedPtr = Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they @@ -183,17 +200,21 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (AreEquivalentAddressValues( + LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + CastInst::isBitCastable(LI->getType(), AccessTy)) { if (AATags) LI->getAAMetadata(*AATags); return LI; } if (StoreInst *SI = dyn_cast(Inst)) { + Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (AreEquivalentAddressValues(StorePtr, StrippedPtr) && + CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) { if (AATags) SI->getAAMetadata(*AATags); return SI->getOperand(0); @@ -202,15 +223,15 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important // for reg2mem'd code. - if ((isa(Ptr) || isa(Ptr)) && - (isa(SI->getOperand(1)) || - isa(SI->getOperand(1)))) + if ((isa(StrippedPtr) || isa(StrippedPtr)) && + (isa(StorePtr) || isa(StorePtr))) continue; // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -223,7 +244,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; // May modify the pointer, bail out. diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 8e13dde0854..32ac62e74c0 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -420,7 +420,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); + return ReplaceInstUsesWith( + LI, Builder->CreateBitCast(AvailableVal, LI.getType())); // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast(Op)) { diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 924d4e14677..fbb5c201347 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -901,6 +901,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); + if (AvailableVal->getType() != LI->getType()) + AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal, + LI->getType(), "", LI); LI->replaceAllUsesWith(AvailableVal); LI->eraseFromParent(); return true; @@ -1031,7 +1034,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); - PN->addIncoming(I->second, I->first); + // If we have an available predecessor but it requires casting, insert the + // cast in the predecessor and use the cast. + Value *PredV = I->second; + if (PredV->getType() != LI->getType()) + PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "", P); + + PN->addIncoming(PredV, I->first); } //cerr << "PRE: " << *LI << *PN << "\n"; diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll index 05f65efc033..6cf9f0f6b4d 100644 --- a/test/Transforms/InstCombine/select.ll +++ b/test/Transforms/InstCombine/select.ll @@ -1276,3 +1276,112 @@ define i32 @test77(i1 %flag, i32* %x) { %v = load i32* %p ret i32 %v } + +define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) { +; Test that we can speculate the loads around the select even when we can't +; fold the load completely away. +; CHECK-LABEL: @test78( +; CHECK: %[[V1:.*]] = load i32* %x +; CHECK-NEXT: %[[V2:.*]] = load i32* %y +; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]] +; CHECK-NEXT: ret i32 %[[S]] +entry: + store i32 0, i32* %x + store i32 0, i32* %y + ; Block forwarding by storing to %z which could alias either %x or %y. + store i32 42, i32* %z + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32* %p + ret i32 %v +} + +define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) { +; Test that we can speculate the loads around the select even when we can't +; fold the load completely away. +; CHECK-LABEL: @test79( +; CHECK: %[[V1:.*]] = load float* %x +; CHECK-NEXT: %[[V2:.*]] = load float* %y +; CHECK-NEXT: %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]] +; CHECK-NEXT: ret float %[[S]] +entry: + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + store i32 0, i32* %x1 + store i32 0, i32* %y + ; Block forwarding by storing to %z which could alias either %x or %y. + store i32 42, i32* %z + %p = select i1 %flag, float* %x, float* %y1 + %v = load float* %p + ret float %v +} + +define i32 @test80(i1 %flag) { +; Test that when we speculate the loads around the select they fold throug +; load->load folding and load->store folding. +; CHECK-LABEL: @test80( +; CHECK: %[[X:.*]] = alloca i32 +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK: %[[V:.*]] = load i32* %[[X]] +; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]] +; CHECK-NEXT: ret i32 %[[V]] +entry: + %x = alloca i32 + %y = alloca i32 + call void @scribble_on_memory(i32* %x) + call void @scribble_on_memory(i32* %y) + %tmp = load i32* %x + store i32 %tmp, i32* %y + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32* %p + ret i32 %v +} + +define float @test81(i1 %flag) { +; Test that we can speculate the load around the select even though they use +; differently typed pointers. +; CHECK-LABEL: @test81( +; CHECK: %[[X:.*]] = alloca i32 +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK: %[[V:.*]] = load i32* %[[X]] +; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]] +; CHECK-NEXT: %[[C:.*]] = bitcast i32 %[[V]] to float +; CHECK-NEXT: ret float %[[C]] +entry: + %x = alloca float + %y = alloca i32 + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + call void @scribble_on_memory(i32* %x1) + call void @scribble_on_memory(i32* %y) + %tmp = load i32* %x1 + store i32 %tmp, i32* %y + %p = select i1 %flag, float* %x, float* %y1 + %v = load float* %p + ret float %v +} + +define i32 @test82(i1 %flag) { +; Test that we can speculate the load around the select even though they use +; differently typed pointers. +; CHECK-LABEL: @test82( +; CHECK: %[[X:.*]] = alloca float +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK-NEXT: %[[X1:.*]] = bitcast float* %[[X]] to i32* +; CHECK-NEXT: %[[Y1:.*]] = bitcast i32* %[[Y]] to float* +; CHECK: %[[V:.*]] = load float* %[[X]] +; CHECK-NEXT: store float %[[V]], float* %[[Y1]] +; CHECK-NEXT: %[[C:.*]] = bitcast float %[[V]] to i32 +; CHECK-NEXT: ret i32 %[[C]] +entry: + %x = alloca float + %y = alloca i32 + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + call void @scribble_on_memory(i32* %x1) + call void @scribble_on_memory(i32* %y) + %tmp = load float* %x + store float %tmp, float* %y1 + %p = select i1 %flag, i32* %x1, i32* %y + %v = load i32* %p + ret i32 %v +}