1
0
mirror of https://github.com/c64scene-ar/llvm-6502.git synced 2024-12-14 11:32:34 +00:00

add support for forwarding mem intrinsic values to non-local loads.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90697 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-12-06 04:54:31 +00:00
parent 0fce29b9d1
commit cb9cbc4949
2 changed files with 97 additions and 33 deletions
lib/Transforms/Scalar
test/Transforms/GVN

View File

@ -1192,19 +1192,47 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
struct AvailableValueInBlock {
/// BB - The basic block in question.
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
Value *V;
/// Offset - The byte offset in V that is interesting for the load query.
PointerIntPair<Value *, 1, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.V = V;
Res.Val.setPointer(V);
Res.Val.setInt(SimpleVal);
Res.Offset = Offset;
return Res;
}
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.Val.setPointer(MI);
Res.Val.setInt(MemIntrin);
Res.Offset = Offset;
return Res;
}
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
MemIntrinsic *getMemIntrinValue() const {
assert(!isSimpleValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
};
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@ -1221,30 +1249,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
const Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
BasicBlock *BB = ValuesPerBlock[i].BB;
Value *AvailableVal = ValuesPerBlock[i].V;
unsigned Offset = ValuesPerBlock[i].Offset;
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
continue;
if (AvailableVal->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
BB->getTerminator(), *TD);
if (Offset) {
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].V << '\n'
unsigned Offset = AV.Offset;
Value *AvailableVal;
if (AV.isSimpleValue()) {
AvailableVal = AV.getSimpleValue();
if (AvailableVal->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *AV.getSimpleValue() << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].V << '\n'
} else {
AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *AV.getMemIntrinValue() << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
SSAUpdate.AddAvailableValue(BB, AvailableVal);
}
@ -1324,19 +1355,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
}
#if 0
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
if (TD) {
int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
if (Offset != -1)
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
continue;
}
}
}
#endif
UnavailableBlocks.push_back(DepBB);
continue;
@ -1462,19 +1494,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
if (ValuesPerBlock[i].V == LI)
if (ValuesPerBlock[i].isSimpleValue() &&
ValuesPerBlock[i].getSimpleValue() == LI)
return false;
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
if (isSinglePred) {
bool isHot = false;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
if (AV.isSimpleValue())
// "Hot" Instruction is in some loop (because it dominates its dep.
// instruction).
if (DT->dominates(LI, I)) {
isHot = true;
break;
}
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
if (DT->dominates(LI, I)) {
isHot = true;
break;
}
}
// We are interested only in "hot" instructions. We don't want to do any
// mis-optimizations here.

View File

@ -163,6 +163,31 @@ entry:
; CHECK-NEXT: ret float
}
;; non-local memset -> i16 load forwarding.
define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
%P3 = bitcast i16* %P to i8*
br i1 %cond, label %T, label %F
T:
tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
br label %Cont
F:
tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
br label %Cont
Cont:
%P2 = getelementptr i16* %P, i32 4
%A = load i16* %P2
ret i16 %A
; CHECK: @memset_to_i16_nonlocal0
; CHECK: Cont:
; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ]
; CHECK-NOT: load
; CHECK: ret i16 %A
}
declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
@ -192,6 +217,7 @@ Cont:
; CHECK: ret i8 %A
}
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
;; bitcast equivalence can be properly phi translated.
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {