mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-04 10:30:01 +00:00
Handle forwarding local memsets to loads. For example, we optimize this:
short x(short *A) { memset(A, 1, sizeof(*A)*100); return A[42]; } to 'return 257' instead of doing the load. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90695 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
90ec9680a9
commit
faf815b938
@ -40,6 +40,7 @@
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/Support/IRBuilder.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
@ -987,25 +988,24 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
||||
}
|
||||
|
||||
|
||||
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering store. This means
|
||||
/// that the store *may* provide bits used by the load but we can't be sure
|
||||
/// because the pointers don't mustalias. Check this case to see if there is
|
||||
/// anything more we can do before we give up. This returns -1 if we have to
|
||||
/// give up, or a byte number in the stored value of the piece that feeds the
|
||||
/// load.
|
||||
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering memory write (store,
|
||||
/// memset, memcpy, memmove). This means that the write *may* provide bits used
|
||||
/// by the load but we can't be sure because the pointers don't mustalias.
|
||||
///
|
||||
/// Check this case to see if there is anything more we can do before we give
|
||||
/// up. This returns -1 if we have to give up, or a byte number in the stored
|
||||
/// value of the piece that feeds the load.
|
||||
static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
|
||||
uint64_t WriteSizeInBits,
|
||||
const TargetData &TD) {
|
||||
// If the loaded or stored value is an first class array or struct, don't try
|
||||
// to transform them. We need to be able to bitcast to integer.
|
||||
if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
|
||||
isa<StructType>(DepSI->getOperand(0)->getType()) ||
|
||||
isa<ArrayType>(DepSI->getOperand(0)->getType()))
|
||||
if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()))
|
||||
return -1;
|
||||
|
||||
int64_t StoreOffset = 0, LoadOffset = 0;
|
||||
Value *StoreBase =
|
||||
GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
|
||||
Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
|
||||
Value *LoadBase =
|
||||
GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
|
||||
if (StoreBase != LoadBase)
|
||||
@ -1018,8 +1018,8 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
#if 0
|
||||
errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
|
||||
<< "Base = " << *StoreBase << "\n"
|
||||
<< "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
|
||||
<< "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
|
||||
<< "Store Ptr = " << *WritePtr << "\n"
|
||||
<< "Store Offs = " << StoreOffset << "\n"
|
||||
<< "Load Ptr = " << *L->getPointerOperand() << "\n"
|
||||
<< "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
|
||||
errs() << "'" << L->getParent()->getParent()->getName() << "'"
|
||||
@ -1033,12 +1033,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
// must have gotten confused.
|
||||
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
|
||||
// remove this check, as it is duplicated with what we have below.
|
||||
uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
|
||||
uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
|
||||
|
||||
if ((StoreSize & 7) | (LoadSize & 7))
|
||||
if ((WriteSizeInBits & 7) | (LoadSize & 7))
|
||||
return -1;
|
||||
StoreSize >>= 3; // Convert to bytes.
|
||||
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
|
||||
LoadSize >>= 3;
|
||||
|
||||
|
||||
@ -1052,8 +1051,8 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
#if 0
|
||||
errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
|
||||
<< "Base = " << *StoreBase << "\n"
|
||||
<< "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
|
||||
<< "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
|
||||
<< "Store Ptr = " << *WritePtr << "\n"
|
||||
<< "Store Offs = " << StoreOffset << "\n"
|
||||
<< "Load Ptr = " << *L->getPointerOperand() << "\n"
|
||||
<< "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
|
||||
errs() << "'" << L->getParent()->getParent()->getName() << "'"
|
||||
@ -1075,6 +1074,34 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
return LoadOffset-StoreOffset;
|
||||
}
|
||||
|
||||
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering store.
|
||||
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||
const TargetData &TD) {
|
||||
// Cannot handle reading from store of first-class aggregate yet.
|
||||
if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
|
||||
isa<ArrayType>(DepSI->getOperand(0)->getType()))
|
||||
return -1;
|
||||
|
||||
Value *StorePtr = DepSI->getPointerOperand();
|
||||
uint64_t StoreSize = TD.getTypeSizeInBits(StorePtr->getType());
|
||||
return AnalyzeLoadFromClobberingWrite(L, StorePtr, StoreSize, TD);
|
||||
}
|
||||
|
||||
static int AnalyzeLoadFromClobberingMemInst(LoadInst *L, MemIntrinsic *MI,
|
||||
const TargetData &TD) {
|
||||
// If the mem operation is a non-constant size, we can't handle it.
|
||||
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
|
||||
if (SizeCst == 0) return -1;
|
||||
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
|
||||
|
||||
if (MI->getIntrinsicID() == Intrinsic::memset)
|
||||
return AnalyzeLoadFromClobberingWrite(L, MI->getDest(), MemSizeInBits, TD);
|
||||
|
||||
// Unhandled memcpy/memmove.
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/// GetStoreValueForLoad - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering store. This means
|
||||
@ -1100,11 +1127,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
|
||||
|
||||
// Shift the bits to the least significant depending on endianness.
|
||||
unsigned ShiftAmt;
|
||||
if (TD.isLittleEndian()) {
|
||||
if (TD.isLittleEndian())
|
||||
ShiftAmt = Offset*8;
|
||||
} else {
|
||||
else
|
||||
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
|
||||
}
|
||||
|
||||
if (ShiftAmt)
|
||||
SrcVal = BinaryOperator::CreateLShr(SrcVal,
|
||||
@ -1117,6 +1143,52 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
|
||||
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
|
||||
}
|
||||
|
||||
/// GetMemInstValueForLoad - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering mem intrinsic.
|
||||
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
const Type *LoadTy, Instruction *InsertPt,
|
||||
const TargetData &TD){
|
||||
LLVMContext &Ctx = LoadTy->getContext();
|
||||
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
|
||||
|
||||
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
|
||||
|
||||
// We know that this method is only called when the mem transfer fully
|
||||
// provides the bits for the load.
|
||||
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
|
||||
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
|
||||
// independently of what the offset is.
|
||||
Value *Val = MSI->getValue();
|
||||
if (LoadSize != 1)
|
||||
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
|
||||
|
||||
Value *OneElt = Val;
|
||||
|
||||
// Splat the value out to the right number of bits.
|
||||
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
|
||||
// If we can double the number of bytes set, do it.
|
||||
if (NumBytesSet*2 <= LoadSize) {
|
||||
Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
|
||||
Val = Builder.CreateOr(Val, ShVal);
|
||||
NumBytesSet <<= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise insert one byte at a time.
|
||||
Value *ShVal = Builder.CreateShl(Val, 1*8);
|
||||
Val = Builder.CreateOr(OneElt, ShVal);
|
||||
++NumBytesSet;
|
||||
}
|
||||
|
||||
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
|
||||
}
|
||||
|
||||
// ABORT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct AvailableValueInBlock {
|
||||
/// BB - The basic block in question.
|
||||
BasicBlock *BB;
|
||||
@ -1251,8 +1323,21 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// If the clobbering value is a memset/memcpy/memmove, see if we can
|
||||
// forward a value on from it.
|
||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
|
||||
if (TD == 0)
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (TD) {
|
||||
int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
|
||||
if (Offset != -1)
|
||||
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// FIXME: Handle memset/memcpy.
|
||||
UnavailableBlocks.push_back(DepBB);
|
||||
continue;
|
||||
}
|
||||
@ -1526,11 +1611,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
|
||||
|
||||
// If the value isn't available, don't do anything!
|
||||
if (Dep.isClobber()) {
|
||||
// FIXME: We should handle memset/memcpy/memmove as dependent instructions
|
||||
// to forward the value if available.
|
||||
//if (isa<MemIntrinsic>(Dep.getInst()))
|
||||
//errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
|
||||
|
||||
// Check to see if we have something like this:
|
||||
// store i32 123, i32* %P
|
||||
// %A = bitcast i32* %P to i8*
|
||||
@ -1541,25 +1621,38 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
|
||||
// a common base + constant offset, and if the previous store (or memset)
|
||||
// completely covers this load. This sort of thing can happen in bitfield
|
||||
// access code.
|
||||
Value *AvailVal = 0;
|
||||
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
|
||||
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
|
||||
int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
|
||||
if (Offset != -1) {
|
||||
Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
|
||||
L->getType(), L, *TD);
|
||||
DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
|
||||
<< *AvailVal << '\n' << *L << "\n\n\n");
|
||||
|
||||
// Replace the load!
|
||||
L->replaceAllUsesWith(AvailVal);
|
||||
if (isa<PointerType>(AvailVal->getType()))
|
||||
MD->invalidateCachedPointerInfo(AvailVal);
|
||||
toErase.push_back(L);
|
||||
NumGVNLoad++;
|
||||
return true;
|
||||
}
|
||||
if (Offset != -1)
|
||||
AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
|
||||
L->getType(), L, *TD);
|
||||
}
|
||||
|
||||
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
|
||||
// a value on from it.
|
||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
|
||||
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
|
||||
int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
|
||||
if (Offset != -1)
|
||||
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
|
||||
}
|
||||
}
|
||||
|
||||
if (AvailVal) {
|
||||
DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
|
||||
<< *AvailVal << '\n' << *L << "\n\n\n");
|
||||
|
||||
// Replace the load!
|
||||
L->replaceAllUsesWith(AvailVal);
|
||||
if (isa<PointerType>(AvailVal->getType()))
|
||||
MD->invalidateCachedPointerInfo(AvailVal);
|
||||
toErase.push_back(L);
|
||||
NumGVNLoad++;
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG(
|
||||
// fast print dep, using operator<< on instruction would be too slow
|
||||
errs() << "GVN: load ";
|
||||
|
@ -131,6 +131,43 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
|
||||
; CHECK: ret i8*
|
||||
}
|
||||
|
||||
; memset -> i16 forwarding.
|
||||
define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
|
||||
entry:
|
||||
%conv = bitcast i16* %A to i8*
|
||||
tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
|
||||
%arrayidx = getelementptr inbounds i16* %A, i64 42
|
||||
%tmp2 = load i16* %arrayidx
|
||||
ret i16 %tmp2
|
||||
; CHECK: @memset_to_i16_local
|
||||
; CHECK-NOT: load
|
||||
; CHECK: ret i16 257
|
||||
}
|
||||
|
||||
; memset -> float forwarding.
|
||||
define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
|
||||
entry:
|
||||
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
|
||||
tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
|
||||
%arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
|
||||
%tmp2 = load float* %arrayidx ; <float> [#uses=1]
|
||||
ret float %tmp2
|
||||
; CHECK: @memset_to_float_local
|
||||
; CHECK-NOT: load
|
||||
; CHECK: zext
|
||||
; CHECK-NEXT: shl
|
||||
; CHECK-NEXT: or
|
||||
; CHECK-NEXT: shl
|
||||
; CHECK-NEXT: or
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: ret float
|
||||
}
|
||||
|
||||
declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
|
||||
|
||||
|
||||
|
||||
|
||||
;; non-local i32/float -> i8 load forwarding.
|
||||
define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
|
||||
%P2 = bitcast i32* %P to float*
|
||||
|
Loading…
x
Reference in New Issue
Block a user