mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
enable non-local analysis and PRE of large store -> little load.
This doesn't kick in too much because of phi translation issues, but this can be resolved in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82447 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
879135145f
commit
4fbd14e80e
@ -1026,7 +1026,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
|
|||||||
/// be expressed as a base pointer plus a constant offset. Return the base and
|
/// be expressed as a base pointer plus a constant offset. Return the base and
|
||||||
/// offset to the caller.
|
/// offset to the caller.
|
||||||
static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
||||||
const TargetData *TD) {
|
const TargetData &TD) {
|
||||||
Operator *PtrOp = dyn_cast<Operator>(Ptr);
|
Operator *PtrOp = dyn_cast<Operator>(Ptr);
|
||||||
if (PtrOp == 0) return Ptr;
|
if (PtrOp == 0) return Ptr;
|
||||||
|
|
||||||
@ -1046,16 +1046,16 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
|||||||
|
|
||||||
// Handle a struct and array indices which add their offset to the pointer.
|
// Handle a struct and array indices which add their offset to the pointer.
|
||||||
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
||||||
Offset += TD->getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
|
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
|
||||||
} else {
|
} else {
|
||||||
uint64_t Size = TD->getTypeAllocSize(GTI.getIndexedType());
|
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
|
||||||
Offset += OpC->getSExtValue()*Size;
|
Offset += OpC->getSExtValue()*Size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-sign extend from the pointer size if needed to get overflow edge cases
|
// Re-sign extend from the pointer size if needed to get overflow edge cases
|
||||||
// right.
|
// right.
|
||||||
unsigned PtrSize = TD->getPointerSizeInBits();
|
unsigned PtrSize = TD.getPointerSizeInBits();
|
||||||
if (PtrSize < 64)
|
if (PtrSize < 64)
|
||||||
Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
|
Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
|
||||||
|
|
||||||
@ -1071,12 +1071,12 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
|||||||
/// give up, or a byte number in the stored value of the piece that feeds the
|
/// give up, or a byte number in the stored value of the piece that feeds the
|
||||||
/// load.
|
/// load.
|
||||||
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
||||||
const TargetData *TD) {
|
const TargetData &TD) {
|
||||||
int64_t StoreOffset = 0, LoadOffset = 0;
|
int64_t StoreOffset = 0, LoadOffset = 0;
|
||||||
Value *StoreBase =
|
Value *StoreBase =
|
||||||
GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
|
GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
|
||||||
Value *LoadBase =
|
Value *LoadBase =
|
||||||
GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
|
GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
|
||||||
if (StoreBase != LoadBase)
|
if (StoreBase != LoadBase)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -1102,8 +1102,8 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
|||||||
// must have gotten confused.
|
// must have gotten confused.
|
||||||
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
|
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
|
||||||
// remove this check, as it is duplicated with what we have below.
|
// remove this check, as it is duplicated with what we have below.
|
||||||
uint64_t StoreSize = TD->getTypeSizeInBits(DepSI->getOperand(0)->getType());
|
uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
|
||||||
uint64_t LoadSize = TD->getTypeSizeInBits(L->getType());
|
uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
|
||||||
|
|
||||||
if ((StoreSize & 7) | (LoadSize & 7))
|
if ((StoreSize & 7) | (LoadSize & 7))
|
||||||
return -1;
|
return -1;
|
||||||
@ -1150,37 +1150,40 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
|
|||||||
/// that the store *may* provide bits used by the load but we can't be sure
|
/// that the store *may* provide bits used by the load but we can't be sure
|
||||||
/// because the pointers don't mustalias. Check this case to see if there is
|
/// because the pointers don't mustalias. Check this case to see if there is
|
||||||
/// anything more we can do before we give up.
|
/// anything more we can do before we give up.
|
||||||
static Value *GetStoreValueForLoad(Value *SrcVal, int Offset,const Type *LoadTy,
|
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
|
||||||
Instruction *InsertPt, const TargetData *TD){
|
const Type *LoadTy,
|
||||||
|
Instruction *InsertPt, const TargetData &TD){
|
||||||
LLVMContext &Ctx = SrcVal->getType()->getContext();
|
LLVMContext &Ctx = SrcVal->getType()->getContext();
|
||||||
|
|
||||||
uint64_t StoreSize = TD->getTypeSizeInBits(SrcVal->getType())/8;
|
uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
|
||||||
uint64_t LoadSize = TD->getTypeSizeInBits(LoadTy)/8;
|
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
|
||||||
|
|
||||||
|
|
||||||
// Compute which bits of the stored value are being used by the load. Convert
|
// Compute which bits of the stored value are being used by the load. Convert
|
||||||
// to an integer type to start with.
|
// to an integer type to start with.
|
||||||
if (isa<PointerType>(SrcVal->getType()))
|
if (isa<PointerType>(SrcVal->getType()))
|
||||||
SrcVal = new PtrToIntInst(SrcVal, TD->getIntPtrType(Ctx), "tmp", InsertPt);
|
SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
|
||||||
if (!isa<IntegerType>(SrcVal->getType()))
|
if (!isa<IntegerType>(SrcVal->getType()))
|
||||||
SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
|
SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
|
||||||
"tmp", InsertPt);
|
"tmp", InsertPt);
|
||||||
|
|
||||||
// Shift the bits to the least significant depending on endianness.
|
// Shift the bits to the least significant depending on endianness.
|
||||||
unsigned ShiftAmt;
|
unsigned ShiftAmt;
|
||||||
if (TD->isLittleEndian()) {
|
if (TD.isLittleEndian()) {
|
||||||
ShiftAmt = Offset*8;
|
ShiftAmt = Offset*8;
|
||||||
} else {
|
} else {
|
||||||
ShiftAmt = StoreSize-LoadSize-Offset;
|
ShiftAmt = StoreSize-LoadSize-Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
SrcVal = BinaryOperator::CreateLShr(SrcVal,
|
if (ShiftAmt)
|
||||||
ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
|
SrcVal = BinaryOperator::CreateLShr(SrcVal,
|
||||||
|
ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
|
||||||
|
|
||||||
SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
|
if (LoadSize != StoreSize)
|
||||||
"tmp", InsertPt);
|
SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
|
||||||
|
"tmp", InsertPt);
|
||||||
|
|
||||||
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, *TD);
|
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct AvailableValueInBlock {
|
struct AvailableValueInBlock {
|
||||||
@ -1188,11 +1191,15 @@ struct AvailableValueInBlock {
|
|||||||
BasicBlock *BB;
|
BasicBlock *BB;
|
||||||
/// V - The value that is live out of the block.
|
/// V - The value that is live out of the block.
|
||||||
Value *V;
|
Value *V;
|
||||||
|
/// Offset - The byte offset in V that is interesting for the load query.
|
||||||
|
unsigned Offset;
|
||||||
|
|
||||||
static AvailableValueInBlock get(BasicBlock *BB, Value *V) {
|
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
|
||||||
|
unsigned Offset = 0) {
|
||||||
AvailableValueInBlock Res;
|
AvailableValueInBlock Res;
|
||||||
Res.BB = BB;
|
Res.BB = BB;
|
||||||
Res.V = V;
|
Res.V = V;
|
||||||
|
Res.Offset = Offset;
|
||||||
return Res;
|
return Res;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -1209,14 +1216,23 @@ GetAvailableBlockValues(DenseMap<BasicBlock*, Value*> &BlockReplValues,
|
|||||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||||
BasicBlock *BB = ValuesPerBlock[i].BB;
|
BasicBlock *BB = ValuesPerBlock[i].BB;
|
||||||
Value *AvailableVal = ValuesPerBlock[i].V;
|
Value *AvailableVal = ValuesPerBlock[i].V;
|
||||||
|
unsigned Offset = ValuesPerBlock[i].Offset;
|
||||||
|
|
||||||
Value *&BlockEntry = BlockReplValues[BB];
|
Value *&BlockEntry = BlockReplValues[BB];
|
||||||
if (BlockEntry) continue;
|
if (BlockEntry) continue;
|
||||||
|
|
||||||
if (AvailableVal->getType() != LoadTy) {
|
if (AvailableVal->getType() != LoadTy) {
|
||||||
assert(TD && "Need target data to handle type mismatch case");
|
assert(TD && "Need target data to handle type mismatch case");
|
||||||
AvailableVal = CoerceAvailableValueToLoadType(AvailableVal, LoadTy,
|
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
|
||||||
BB->getTerminator(), *TD);
|
BB->getTerminator(), *TD);
|
||||||
|
|
||||||
|
if (Offset) {
|
||||||
|
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
||||||
|
<< *ValuesPerBlock[i].V << '\n'
|
||||||
|
<< *AvailableVal << '\n' << "\n\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
||||||
<< *ValuesPerBlock[i].V << '\n'
|
<< *ValuesPerBlock[i].V << '\n'
|
||||||
<< *AvailableVal << '\n' << "\n\n\n");
|
<< *AvailableVal << '\n' << "\n\n\n");
|
||||||
@ -1267,6 +1283,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
|||||||
MemDepResult DepInfo = Deps[i].second;
|
MemDepResult DepInfo = Deps[i].second;
|
||||||
|
|
||||||
if (DepInfo.isClobber()) {
|
if (DepInfo.isClobber()) {
|
||||||
|
// If the dependence is to a store that writes to a superset of the bits
|
||||||
|
// read by the load, we can extract the bits we need for the load from the
|
||||||
|
// stored value.
|
||||||
|
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
|
||||||
|
if (TD == 0)
|
||||||
|
TD = getAnalysisIfAvailable<TargetData>();
|
||||||
|
if (TD) {
|
||||||
|
int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
|
||||||
|
if (Offset != -1) {
|
||||||
|
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
|
||||||
|
DepSI->getOperand(0),
|
||||||
|
Offset));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Handle memset/memcpy.
|
||||||
UnavailableBlocks.push_back(DepBB);
|
UnavailableBlocks.push_back(DepBB);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1299,8 +1333,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
|||||||
|
|
||||||
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
|
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
|
||||||
S->getOperand(0)));
|
S->getOperand(0)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
} else if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
|
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
|
||||||
// If the types mismatch and we can't handle it, reject reuse of the load.
|
// If the types mismatch and we can't handle it, reject reuse of the load.
|
||||||
if (LD->getType() != LI->getType()) {
|
if (LD->getType() != LI->getType()) {
|
||||||
if (TD == 0)
|
if (TD == 0)
|
||||||
@ -1316,11 +1352,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
|
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
|
||||||
} else {
|
|
||||||
// FIXME: Handle memset/memcpy.
|
|
||||||
UnavailableBlocks.push_back(DepBB);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UnavailableBlocks.push_back(DepBB);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have no predecessors that produce a known value for this load, exit
|
// If we have no predecessors that produce a known value for this load, exit
|
||||||
@ -1550,10 +1586,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
// access code.
|
// access code.
|
||||||
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
|
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
|
||||||
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
|
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
|
||||||
int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, TD);
|
int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
|
||||||
if (Offset != -1) {
|
if (Offset != -1) {
|
||||||
Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
|
Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
|
||||||
L->getType(), L, TD);
|
L->getType(), L, *TD);
|
||||||
DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
|
DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
|
||||||
<< *AvailVal << '\n' << *L << "\n\n\n");
|
<< *AvailVal << '\n' << *L << "\n\n\n");
|
||||||
|
|
||||||
|
@ -199,7 +199,7 @@ Cont:
|
|||||||
;; types, and the reload is an offset from the store pointer.
|
;; types, and the reload is an offset from the store pointer.
|
||||||
;;===----------------------------------------------------------------------===;;
|
;;===----------------------------------------------------------------------===;;
|
||||||
|
|
||||||
;; i32 -> f32 forwarding.
|
;; i32 -> i8 forwarding.
|
||||||
;; PR4216
|
;; PR4216
|
||||||
define i8 @coerce_offset0(i32 %V, i32* %P) {
|
define i8 @coerce_offset0(i32 %V, i32* %P) {
|
||||||
store i32 %V, i32* %P
|
store i32 %V, i32* %P
|
||||||
@ -214,5 +214,55 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
|
|||||||
; CHECK: ret i8
|
; CHECK: ret i8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; non-local i32/float -> i8 load forwarding.
|
||||||
|
define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
|
||||||
|
%P2 = bitcast i32* %P to float*
|
||||||
|
%P3 = bitcast i32* %P to i8*
|
||||||
|
%P4 = getelementptr i8* %P3, i32 2
|
||||||
|
br i1 %cond, label %T, label %F
|
||||||
|
T:
|
||||||
|
store i32 42, i32* %P
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
F:
|
||||||
|
store float 1.0, float* %P2
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
Cont:
|
||||||
|
%A = load i8* %P4
|
||||||
|
ret i8 %A
|
||||||
|
|
||||||
|
; CHECK: @coerce_offset_nonlocal0
|
||||||
|
; CHECK: Cont:
|
||||||
|
; CHECK: %A = phi i8 [
|
||||||
|
; CHECK-NOT: load
|
||||||
|
; CHECK: ret i8 %A
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
;; non-local i32 -> i8 partial redundancy load forwarding.
|
||||||
|
define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
|
||||||
|
%P3 = bitcast i32* %P to i8*
|
||||||
|
%P4 = getelementptr i8* %P3, i32 2
|
||||||
|
br i1 %cond, label %T, label %F
|
||||||
|
T:
|
||||||
|
store i32 42, i32* %P
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
F:
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
Cont:
|
||||||
|
%A = load i8* %P4
|
||||||
|
ret i8 %A
|
||||||
|
|
||||||
|
; CHECK: @coerce_offset_pre0
|
||||||
|
; CHECK: F:
|
||||||
|
; CHECK: load i8* %P4
|
||||||
|
; CHECK: Cont:
|
||||||
|
; CHECK: %A = phi i8 [
|
||||||
|
; CHECK-NOT: load
|
||||||
|
; CHECK: ret i8 %A
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user