From 28eb1c5217416aa60b06b8b569a5de8047f75514 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 4 Nov 2011 00:48:26 +0000 Subject: [PATCH] Speculatively revert "DeadStoreElimination can now trim the size of a store if the end of it is dead.", which appears to break bootstrapping LLVM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143668 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/DeadStoreElimination.cpp | 140 ++++-------------- .../DeadStoreElimination/OverwriteStoreEnd.ll | 78 ---------- 2 files changed, 32 insertions(+), 186 deletions(-) delete mode 100644 test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 921bec8f4de..c0738a951c4 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -239,24 +239,6 @@ static bool isRemovable(Instruction *I) { } } - -/// isShortenable - Returns true if this instruction can be safely shortened in -/// length. -static bool isShortenable(Instruction *I) { - // Don't shorten stores for now - if (isa(I)) - return false; - - IntrinsicInst *II = cast(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; - } -} - /// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) @@ -311,24 +293,11 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) { return false; } -namespace { - enum OverwriteResult - { - OverwriteComplete, - OverwriteEnd, - OverwriteUnknown - }; -} - -/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location +/// isCompleteOverwrite - Return true if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely -/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined -static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, - const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA, - int64_t& EarlierOff, - int64_t& LaterOff) { +static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -342,24 +311,23 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we have no TargetData information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (AA.getTargetData() == 0 && - Later.Ptr->getType() == Earlier.Ptr->getType()) - return OverwriteComplete; - - return OverwriteUnknown; + if (AA.getTargetData() == 0) + return Later.Ptr->getType() == Earlier.Ptr->getType(); + return false; } // Make sure that the Later size is >= the Earlier size. - if (Later.Size >= Earlier.Size) - return OverwriteComplete; + if (Later.Size < Earlier.Size) + return false; + return true; } // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || - AA.getTargetData() == 0) - return OverwriteUnknown; + Later.Size <= Earlier.Size || AA.getTargetData() == 0) + return false; // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any @@ -372,27 +340,26 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) - return OverwriteUnknown; + return false; // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = TD.getTypeAllocSize(cast(UO2->getType())->getElementType()); if (ObjectSize == Later.Size) - return OverwriteComplete; + return true; } // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; + int64_t EarlierOff = 0, LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) - return OverwriteUnknown; + return false; // The later store completely overlaps the earlier store if: // @@ -410,24 +377,11 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) - return OverwriteComplete; - - // The other interesting case is if the later store overwrites the end of - // the earlier store - // - // |--earlier--| - // |-- later --| - // - // In this case we may want to trim the size of earlier to avoid generating - // writes to addresses which will definitely be overwritten later - if (LaterOff > EarlierOff && - LaterOff + Later.Size >= EarlierOff + Earlier.Size) - return OverwriteEnd; + return true; // Otherwise, they don't completely overlap. - return OverwriteUnknown; + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -551,52 +505,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && + if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - break; - } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic* DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - if (llvm::isPowerOf2_64(InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " - << DepLoc.Size << ")" - << *Inst << '\n'); - - Value* DepWriteLength = DepIntrinsic->getLength(); - Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - - DepWriteOffset); - DepIntrinsic->setLength(TrimmedLength); - MadeChange = true; - } - } + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; } // If this is a may-aliased store that is clobbering the store value, we diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll deleted file mode 100644 index 828ccc57a44..00000000000 --- a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ /dev/null @@ -1,78 +0,0 @@ -; RUN: opt < %s -basicaa -dse -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -%struct.vec2 = type { <4 x i32>, <4 x i32> } -%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 } - -@glob1 = global %struct.vec2 zeroinitializer, align 16 -@glob2 = global %struct.vec2plusi zeroinitializer, align 16 - -define void @write24to28(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @write24to28 -entry: - %arrayidx0 = getelementptr inbounds i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false) - call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) - %arrayidx1 = getelementptr inbounds i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write28to32(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @write28to32 -entry: - %p3 = bitcast i32* %p to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) - call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false) - %arrayidx1 = getelementptr inbounds i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @dontwrite28to32memset -entry: - %p3 = bitcast i32* %p to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false) - call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false) - %arrayidx1 = getelementptr inbounds i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { -; CHECK: @write32to36 -entry: - %0 = bitcast %struct.vec2plusi* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false) - %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2 - store i32 1, i32* %c, align 4 - ret void -} - -define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK: @write16to32 -entry: - %0 = bitcast %struct.vec2* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) - %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1 - store <4 x i32> , <4 x i32>* %c, align 4 - ret void -} - -define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK: @dontwrite28to32memcpy -entry: - %0 = bitcast %struct.vec2* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) - %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind