mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-18 10:24:45 +00:00
DeadStoreElimination can now trim the size of a store if the end of it is dead.
Only currently done if the later store is writing to a power of 2 address or has the same alignment as the earlier store as then its likely to not break up large stores into smaller ones Fixes <rdar://problem/10140300> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143630 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -239,6 +239,24 @@ static bool isRemovable(Instruction *I) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// isShortenable - Returns true if this instruction can be safely shortened in
|
||||||
|
/// length.
|
||||||
|
static bool isShortenable(Instruction *I) {
|
||||||
|
// Don't shorten stores for now
|
||||||
|
if (isa<StoreInst>(I))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
IntrinsicInst *II = cast<IntrinsicInst>(I);
|
||||||
|
switch (II->getIntrinsicID()) {
|
||||||
|
default: return false;
|
||||||
|
case Intrinsic::memset:
|
||||||
|
case Intrinsic::memcpy:
|
||||||
|
// Do shorten memory intrinsics.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// getStoredPointerOperand - Return the pointer that is being written to.
|
/// getStoredPointerOperand - Return the pointer that is being written to.
|
||||||
static Value *getStoredPointerOperand(Instruction *I) {
|
static Value *getStoredPointerOperand(Instruction *I) {
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||||
@@ -293,11 +311,24 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isCompleteOverwrite - Return true if a store to the 'Later' location
|
namespace {
|
||||||
|
enum OverwriteResult
|
||||||
|
{
|
||||||
|
OverwriteComplete,
|
||||||
|
OverwriteEnd,
|
||||||
|
OverwriteUnknown
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
|
||||||
/// completely overwrites a store to the 'Earlier' location.
|
/// completely overwrites a store to the 'Earlier' location.
|
||||||
static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
|
/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
|
||||||
const AliasAnalysis::Location &Earlier,
|
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
|
||||||
AliasAnalysis &AA) {
|
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||||
|
const AliasAnalysis::Location &Earlier,
|
||||||
|
AliasAnalysis &AA,
|
||||||
|
int64_t& EarlierOff,
|
||||||
|
int64_t& LaterOff) {
|
||||||
const Value *P1 = Earlier.Ptr->stripPointerCasts();
|
const Value *P1 = Earlier.Ptr->stripPointerCasts();
|
||||||
const Value *P2 = Later.Ptr->stripPointerCasts();
|
const Value *P2 = Later.Ptr->stripPointerCasts();
|
||||||
|
|
||||||
@@ -311,23 +342,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
|
|||||||
// If we have no TargetData information around, then the size of the store
|
// If we have no TargetData information around, then the size of the store
|
||||||
// is inferrable from the pointee type. If they are the same type, then
|
// is inferrable from the pointee type. If they are the same type, then
|
||||||
// we know that the store is safe.
|
// we know that the store is safe.
|
||||||
if (AA.getTargetData() == 0)
|
if (AA.getTargetData() == 0 &&
|
||||||
return Later.Ptr->getType() == Earlier.Ptr->getType();
|
Later.Ptr->getType() == Earlier.Ptr->getType())
|
||||||
return false;
|
return OverwriteComplete;
|
||||||
|
|
||||||
|
return OverwriteUnknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure that the Later size is >= the Earlier size.
|
// Make sure that the Later size is >= the Earlier size.
|
||||||
if (Later.Size < Earlier.Size)
|
if (Later.Size >= Earlier.Size)
|
||||||
return false;
|
return OverwriteComplete;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, we have to have size information, and the later store has to be
|
// Otherwise, we have to have size information, and the later store has to be
|
||||||
// larger than the earlier one.
|
// larger than the earlier one.
|
||||||
if (Later.Size == AliasAnalysis::UnknownSize ||
|
if (Later.Size == AliasAnalysis::UnknownSize ||
|
||||||
Earlier.Size == AliasAnalysis::UnknownSize ||
|
Earlier.Size == AliasAnalysis::UnknownSize ||
|
||||||
Later.Size <= Earlier.Size || AA.getTargetData() == 0)
|
AA.getTargetData() == 0)
|
||||||
return false;
|
return OverwriteUnknown;
|
||||||
|
|
||||||
// Check to see if the later store is to the entire object (either a global,
|
// Check to see if the later store is to the entire object (either a global,
|
||||||
// an alloca, or a byval argument). If so, then it clearly overwrites any
|
// an alloca, or a byval argument). If so, then it clearly overwrites any
|
||||||
@@ -340,26 +372,27 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
|
|||||||
// If we can't resolve the same pointers to the same object, then we can't
|
// If we can't resolve the same pointers to the same object, then we can't
|
||||||
// analyze them at all.
|
// analyze them at all.
|
||||||
if (UO1 != UO2)
|
if (UO1 != UO2)
|
||||||
return false;
|
return OverwriteUnknown;
|
||||||
|
|
||||||
// If the "Later" store is to a recognizable object, get its size.
|
// If the "Later" store is to a recognizable object, get its size.
|
||||||
if (isObjectPointerWithTrustworthySize(UO2)) {
|
if (isObjectPointerWithTrustworthySize(UO2)) {
|
||||||
uint64_t ObjectSize =
|
uint64_t ObjectSize =
|
||||||
TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
|
TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
|
||||||
if (ObjectSize == Later.Size)
|
if (ObjectSize == Later.Size)
|
||||||
return true;
|
return OverwriteComplete;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Okay, we have stores to two completely different pointers. Try to
|
// Okay, we have stores to two completely different pointers. Try to
|
||||||
// decompose the pointer into a "base + constant_offset" form. If the base
|
// decompose the pointer into a "base + constant_offset" form. If the base
|
||||||
// pointers are equal, then we can reason about the two stores.
|
// pointers are equal, then we can reason about the two stores.
|
||||||
int64_t EarlierOff = 0, LaterOff = 0;
|
EarlierOff = 0;
|
||||||
|
LaterOff = 0;
|
||||||
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
|
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
|
||||||
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
|
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
|
||||||
|
|
||||||
// If the base pointers still differ, we have two completely different stores.
|
// If the base pointers still differ, we have two completely different stores.
|
||||||
if (BP1 != BP2)
|
if (BP1 != BP2)
|
||||||
return false;
|
return OverwriteUnknown;
|
||||||
|
|
||||||
// The later store completely overlaps the earlier store if:
|
// The later store completely overlaps the earlier store if:
|
||||||
//
|
//
|
||||||
@@ -377,11 +410,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
|
|||||||
//
|
//
|
||||||
// We have to be careful here as *Off is signed while *.Size is unsigned.
|
// We have to be careful here as *Off is signed while *.Size is unsigned.
|
||||||
if (EarlierOff >= LaterOff &&
|
if (EarlierOff >= LaterOff &&
|
||||||
|
Later.Size > Earlier.Size &&
|
||||||
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
|
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
|
||||||
return true;
|
return OverwriteComplete;
|
||||||
|
|
||||||
|
// The other interesting case is if the later store overwrites the end of
|
||||||
|
// the earlier store
|
||||||
|
//
|
||||||
|
// |--earlier--|
|
||||||
|
// |-- later --|
|
||||||
|
//
|
||||||
|
// In this case we may want to trim the size of earlier to avoid generating
|
||||||
|
// writes to addresses which will definitely be overwritten later
|
||||||
|
if (LaterOff > EarlierOff &&
|
||||||
|
LaterOff + Later.Size >= EarlierOff + Earlier.Size)
|
||||||
|
return OverwriteEnd;
|
||||||
|
|
||||||
// Otherwise, they don't completely overlap.
|
// Otherwise, they don't completely overlap.
|
||||||
return false;
|
return OverwriteUnknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
|
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
|
||||||
@@ -505,22 +551,52 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
|||||||
// If we find a write that is a) removable (i.e., non-volatile), b) is
|
// If we find a write that is a) removable (i.e., non-volatile), b) is
|
||||||
// completely obliterated by the store to 'Loc', and c) which we know that
|
// completely obliterated by the store to 'Loc', and c) which we know that
|
||||||
// 'Inst' doesn't load from, then we can remove it.
|
// 'Inst' doesn't load from, then we can remove it.
|
||||||
if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
|
if (isRemovable(DepWrite) &&
|
||||||
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
|
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
|
||||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
|
int64_t InstWriteOffset, DepWriteOffset;
|
||||||
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
|
OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
|
||||||
|
DepWriteOffset, InstWriteOffset);
|
||||||
|
if (OR == OverwriteComplete) {
|
||||||
|
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
|
||||||
|
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
|
||||||
|
|
||||||
// Delete the store and now-dead instructions that feed it.
|
// Delete the store and now-dead instructions that feed it.
|
||||||
DeleteDeadInstruction(DepWrite, *MD);
|
DeleteDeadInstruction(DepWrite, *MD);
|
||||||
++NumFastStores;
|
++NumFastStores;
|
||||||
MadeChange = true;
|
MadeChange = true;
|
||||||
|
|
||||||
// DeleteDeadInstruction can delete the current instruction in loop
|
// DeleteDeadInstruction can delete the current instruction in loop
|
||||||
// cases, reset BBI.
|
// cases, reset BBI.
|
||||||
BBI = Inst;
|
BBI = Inst;
|
||||||
if (BBI != BB.begin())
|
if (BBI != BB.begin())
|
||||||
--BBI;
|
--BBI;
|
||||||
break;
|
break;
|
||||||
|
} else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
|
||||||
|
// TODO: base this on the target vector size so that if the earlier
|
||||||
|
// store was too small to get vector writes anyway then its likely
|
||||||
|
// a good idea to shorten it
|
||||||
|
// Power of 2 vector writes are probably always a bad idea to optimize
|
||||||
|
// as any store/memset/memcpy is likely using vector instructions so
|
||||||
|
// shortening it to not vector size is likely to be slower
|
||||||
|
MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
|
||||||
|
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
|
||||||
|
if (llvm::isPowerOf2_64(InstWriteOffset) ||
|
||||||
|
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
|
||||||
|
|
||||||
|
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
|
||||||
|
<< *DepWrite << "\n KILLER (offset "
|
||||||
|
<< InstWriteOffset << ", "
|
||||||
|
<< DepLoc.Size << ")"
|
||||||
|
<< *Inst << '\n');
|
||||||
|
|
||||||
|
Value* DepWriteLength = DepIntrinsic->getLength();
|
||||||
|
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
|
||||||
|
InstWriteOffset -
|
||||||
|
DepWriteOffset);
|
||||||
|
DepIntrinsic->setLength(TrimmedLength);
|
||||||
|
MadeChange = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is a may-aliased store that is clobbering the store value, we
|
// If this is a may-aliased store that is clobbering the store value, we
|
||||||
|
78
test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Normal file
78
test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
|
%struct.vec2 = type { <4 x i32>, <4 x i32> }
|
||||||
|
%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
|
||||||
|
|
||||||
|
@glob1 = global %struct.vec2 zeroinitializer, align 16
|
||||||
|
@glob2 = global %struct.vec2plusi zeroinitializer, align 16
|
||||||
|
|
||||||
|
define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @write24to28
|
||||||
|
entry:
|
||||||
|
%arrayidx0 = getelementptr inbounds i32* %p, i64 1
|
||||||
|
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||||
|
%arrayidx1 = getelementptr inbounds i32* %p, i64 7
|
||||||
|
store i32 1, i32* %arrayidx1, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @write28to32
|
||||||
|
entry:
|
||||||
|
%p3 = bitcast i32* %p to i8*
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
|
||||||
|
%arrayidx1 = getelementptr inbounds i32* %p, i64 7
|
||||||
|
store i32 1, i32* %arrayidx1, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @dontwrite28to32memset
|
||||||
|
entry:
|
||||||
|
%p3 = bitcast i32* %p to i8*
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
|
||||||
|
%arrayidx1 = getelementptr inbounds i32* %p, i64 7
|
||||||
|
store i32 1, i32* %arrayidx1, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @write32to36
|
||||||
|
entry:
|
||||||
|
%0 = bitcast %struct.vec2plusi* %p to i8*
|
||||||
|
; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
|
||||||
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
|
||||||
|
%c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
|
||||||
|
store i32 1, i32* %c, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @write16to32
|
||||||
|
entry:
|
||||||
|
%0 = bitcast %struct.vec2* %p to i8*
|
||||||
|
; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
|
||||||
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
|
||||||
|
%c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
|
||||||
|
store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
|
||||||
|
; CHECK: @dontwrite28to32memcpy
|
||||||
|
entry:
|
||||||
|
%0 = bitcast %struct.vec2* %p to i8*
|
||||||
|
; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
|
||||||
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
|
||||||
|
%arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
|
||||||
|
store i32 1, i32* %arrayidx1, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
||||||
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
Reference in New Issue
Block a user