mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-15 22:28:18 +00:00
add support for forwarding mem intrinsic values to non-local loads.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90697 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -1192,19 +1192,47 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
|||||||
struct AvailableValueInBlock {
|
struct AvailableValueInBlock {
|
||||||
/// BB - The basic block in question.
|
/// BB - The basic block in question.
|
||||||
BasicBlock *BB;
|
BasicBlock *BB;
|
||||||
|
enum ValType {
|
||||||
|
SimpleVal, // A simple offsetted value that is accessed.
|
||||||
|
MemIntrin // A memory intrinsic which is loaded from.
|
||||||
|
};
|
||||||
|
|
||||||
/// V - The value that is live out of the block.
|
/// V - The value that is live out of the block.
|
||||||
Value *V;
|
PointerIntPair<Value *, 1, ValType> Val;
|
||||||
/// Offset - The byte offset in V that is interesting for the load query.
|
|
||||||
|
/// Offset - The byte offset in Val that is interesting for the load query.
|
||||||
unsigned Offset;
|
unsigned Offset;
|
||||||
|
|
||||||
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
|
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
|
||||||
unsigned Offset = 0) {
|
unsigned Offset = 0) {
|
||||||
AvailableValueInBlock Res;
|
AvailableValueInBlock Res;
|
||||||
Res.BB = BB;
|
Res.BB = BB;
|
||||||
Res.V = V;
|
Res.Val.setPointer(V);
|
||||||
|
Res.Val.setInt(SimpleVal);
|
||||||
Res.Offset = Offset;
|
Res.Offset = Offset;
|
||||||
return Res;
|
return Res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
|
||||||
|
unsigned Offset = 0) {
|
||||||
|
AvailableValueInBlock Res;
|
||||||
|
Res.BB = BB;
|
||||||
|
Res.Val.setPointer(MI);
|
||||||
|
Res.Val.setInt(MemIntrin);
|
||||||
|
Res.Offset = Offset;
|
||||||
|
return Res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
|
||||||
|
Value *getSimpleValue() const {
|
||||||
|
assert(isSimpleValue() && "Wrong accessor");
|
||||||
|
return Val.getPointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
MemIntrinsic *getMemIntrinValue() const {
|
||||||
|
assert(!isSimpleValue() && "Wrong accessor");
|
||||||
|
return cast<MemIntrinsic>(Val.getPointer());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
|
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
|
||||||
@@ -1221,30 +1249,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
|
|||||||
const Type *LoadTy = LI->getType();
|
const Type *LoadTy = LI->getType();
|
||||||
|
|
||||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||||
BasicBlock *BB = ValuesPerBlock[i].BB;
|
const AvailableValueInBlock &AV = ValuesPerBlock[i];
|
||||||
Value *AvailableVal = ValuesPerBlock[i].V;
|
BasicBlock *BB = AV.BB;
|
||||||
unsigned Offset = ValuesPerBlock[i].Offset;
|
|
||||||
|
|
||||||
if (SSAUpdate.HasValueForBlock(BB))
|
if (SSAUpdate.HasValueForBlock(BB))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (AvailableVal->getType() != LoadTy) {
|
unsigned Offset = AV.Offset;
|
||||||
assert(TD && "Need target data to handle type mismatch case");
|
|
||||||
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
|
Value *AvailableVal;
|
||||||
BB->getTerminator(), *TD);
|
if (AV.isSimpleValue()) {
|
||||||
|
AvailableVal = AV.getSimpleValue();
|
||||||
if (Offset) {
|
if (AvailableVal->getType() != LoadTy) {
|
||||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
assert(TD && "Need target data to handle type mismatch case");
|
||||||
<< *ValuesPerBlock[i].V << '\n'
|
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
|
||||||
|
BB->getTerminator(), *TD);
|
||||||
|
|
||||||
|
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
|
||||||
|
<< *AV.getSimpleValue() << '\n'
|
||||||
<< *AvailableVal << '\n' << "\n\n\n");
|
<< *AvailableVal << '\n' << "\n\n\n");
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
|
||||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
LoadTy, BB->getTerminator(), *TD);
|
||||||
<< *ValuesPerBlock[i].V << '\n'
|
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
|
||||||
|
<< " " << *AV.getMemIntrinValue() << '\n'
|
||||||
<< *AvailableVal << '\n' << "\n\n\n");
|
<< *AvailableVal << '\n' << "\n\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAUpdate.AddAvailableValue(BB, AvailableVal);
|
SSAUpdate.AddAvailableValue(BB, AvailableVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1324,19 +1355,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
// If the clobbering value is a memset/memcpy/memmove, see if we can
|
// If the clobbering value is a memset/memcpy/memmove, see if we can
|
||||||
// forward a value on from it.
|
// forward a value on from it.
|
||||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
|
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
|
||||||
if (TD == 0)
|
if (TD == 0)
|
||||||
TD = getAnalysisIfAvailable<TargetData>();
|
TD = getAnalysisIfAvailable<TargetData>();
|
||||||
if (TD) {
|
if (TD) {
|
||||||
int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
|
int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
|
||||||
if (Offset != -1)
|
if (Offset != -1) {
|
||||||
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
|
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
|
||||||
|
Offset));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
UnavailableBlocks.push_back(DepBB);
|
UnavailableBlocks.push_back(DepBB);
|
||||||
continue;
|
continue;
|
||||||
@@ -1462,19 +1494,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
|||||||
// to eliminate LI even if we insert uses in the other predecessors, we will
|
// to eliminate LI even if we insert uses in the other predecessors, we will
|
||||||
// end up increasing code size. Reject this by scanning for LI.
|
// end up increasing code size. Reject this by scanning for LI.
|
||||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
|
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
|
||||||
if (ValuesPerBlock[i].V == LI)
|
if (ValuesPerBlock[i].isSimpleValue() &&
|
||||||
|
ValuesPerBlock[i].getSimpleValue() == LI)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// FIXME: It is extremely unclear what this loop is doing, other than
|
||||||
|
// artificially restricting loadpre.
|
||||||
if (isSinglePred) {
|
if (isSinglePred) {
|
||||||
bool isHot = false;
|
bool isHot = false;
|
||||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
|
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||||
if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
|
const AvailableValueInBlock &AV = ValuesPerBlock[i];
|
||||||
|
if (AV.isSimpleValue())
|
||||||
// "Hot" Instruction is in some loop (because it dominates its dep.
|
// "Hot" Instruction is in some loop (because it dominates its dep.
|
||||||
// instruction).
|
// instruction).
|
||||||
if (DT->dominates(LI, I)) {
|
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
|
||||||
isHot = true;
|
if (DT->dominates(LI, I)) {
|
||||||
break;
|
isHot = true;
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We are interested only in "hot" instructions. We don't want to do any
|
// We are interested only in "hot" instructions. We don't want to do any
|
||||||
// mis-optimizations here.
|
// mis-optimizations here.
|
||||||
|
@@ -163,6 +163,31 @@ entry:
|
|||||||
; CHECK-NEXT: ret float
|
; CHECK-NEXT: ret float
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; non-local memset -> i16 load forwarding.
|
||||||
|
define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
|
||||||
|
%P3 = bitcast i16* %P to i8*
|
||||||
|
br i1 %cond, label %T, label %F
|
||||||
|
T:
|
||||||
|
tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
F:
|
||||||
|
tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
|
||||||
|
br label %Cont
|
||||||
|
|
||||||
|
Cont:
|
||||||
|
%P2 = getelementptr i16* %P, i32 4
|
||||||
|
%A = load i16* %P2
|
||||||
|
ret i16 %A
|
||||||
|
|
||||||
|
; CHECK: @memset_to_i16_nonlocal0
|
||||||
|
; CHECK: Cont:
|
||||||
|
; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ]
|
||||||
|
; CHECK-NOT: load
|
||||||
|
; CHECK: ret i16 %A
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
|
declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
|
||||||
|
|
||||||
|
|
||||||
@@ -192,6 +217,7 @@ Cont:
|
|||||||
; CHECK: ret i8 %A
|
; CHECK: ret i8 %A
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
|
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
|
||||||
;; bitcast equivalence can be properly phi translated.
|
;; bitcast equivalence can be properly phi translated.
|
||||||
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
|
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
|
||||||
|
Reference in New Issue
Block a user