enable non-local analysis and PRE of large store -> little load.

This doesn't kick in too much because of phi translation issues,
but this can be resolved in the future.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82447 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-09-21 06:48:08 +00:00
parent 879135145f
commit 4fbd14e80e
2 changed files with 117 additions and 31 deletions

View File

@ -1026,7 +1026,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
/// be expressed as a base pointer plus a constant offset. Return the base and /// be expressed as a base pointer plus a constant offset. Return the base and
/// offset to the caller. /// offset to the caller.
static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const TargetData *TD) { const TargetData &TD) {
Operator *PtrOp = dyn_cast<Operator>(Ptr); Operator *PtrOp = dyn_cast<Operator>(Ptr);
if (PtrOp == 0) return Ptr; if (PtrOp == 0) return Ptr;
@ -1046,16 +1046,16 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
// Handle a struct and array indices which add their offset to the pointer. // Handle a struct and array indices which add their offset to the pointer.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) { if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD->getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
} else { } else {
uint64_t Size = TD->getTypeAllocSize(GTI.getIndexedType()); uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
Offset += OpC->getSExtValue()*Size; Offset += OpC->getSExtValue()*Size;
} }
} }
// Re-sign extend from the pointer size if needed to get overflow edge cases // Re-sign extend from the pointer size if needed to get overflow edge cases
// right. // right.
unsigned PtrSize = TD->getPointerSizeInBits(); unsigned PtrSize = TD.getPointerSizeInBits();
if (PtrSize < 64) if (PtrSize < 64)
Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
@ -1071,12 +1071,12 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
/// give up, or a byte number in the stored value of the piece that feeds the /// give up, or a byte number in the stored value of the piece that feeds the
/// load. /// load.
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
const TargetData *TD) { const TargetData &TD) {
int64_t StoreOffset = 0, LoadOffset = 0; int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase = Value *StoreBase =
GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
Value *LoadBase = Value *LoadBase =
GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
if (StoreBase != LoadBase) if (StoreBase != LoadBase)
return -1; return -1;
@ -1102,8 +1102,8 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
// must have gotten confused. // must have gotten confused.
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
// remove this check, as it is duplicated with what we have below. // remove this check, as it is duplicated with what we have below.
uint64_t StoreSize = TD->getTypeSizeInBits(DepSI->getOperand(0)->getType()); uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
uint64_t LoadSize = TD->getTypeSizeInBits(L->getType()); uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
if ((StoreSize & 7) | (LoadSize & 7)) if ((StoreSize & 7) | (LoadSize & 7))
return -1; return -1;
@ -1150,37 +1150,40 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
/// that the store *may* provide bits used by the load but we can't be sure /// that the store *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is /// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up. /// anything more we can do before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, int Offset,const Type *LoadTy, static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Instruction *InsertPt, const TargetData *TD){ const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext(); LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = TD->getTypeSizeInBits(SrcVal->getType())/8; uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
uint64_t LoadSize = TD->getTypeSizeInBits(LoadTy)/8; uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
// Compute which bits of the stored value are being used by the load. Convert // Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with. // to an integer type to start with.
if (isa<PointerType>(SrcVal->getType())) if (isa<PointerType>(SrcVal->getType()))
SrcVal = new PtrToIntInst(SrcVal, TD->getIntPtrType(Ctx), "tmp", InsertPt); SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
if (!isa<IntegerType>(SrcVal->getType())) if (!isa<IntegerType>(SrcVal->getType()))
SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp", InsertPt); "tmp", InsertPt);
// Shift the bits to the least significant depending on endianness. // Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt; unsigned ShiftAmt;
if (TD->isLittleEndian()) { if (TD.isLittleEndian()) {
ShiftAmt = Offset*8; ShiftAmt = Offset*8;
} else { } else {
ShiftAmt = StoreSize-LoadSize-Offset; ShiftAmt = StoreSize-LoadSize-Offset;
} }
SrcVal = BinaryOperator::CreateLShr(SrcVal, if (ShiftAmt)
ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); SrcVal = BinaryOperator::CreateLShr(SrcVal,
ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), if (LoadSize != StoreSize)
"tmp", InsertPt); SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
"tmp", InsertPt);
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, *TD); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
} }
struct AvailableValueInBlock { struct AvailableValueInBlock {
@ -1188,11 +1191,15 @@ struct AvailableValueInBlock {
BasicBlock *BB; BasicBlock *BB;
/// V - The value that is live out of the block. /// V - The value that is live out of the block.
Value *V; Value *V;
/// Offset - The byte offset in V that is interesting for the load query.
unsigned Offset;
static AvailableValueInBlock get(BasicBlock *BB, Value *V) { static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res; AvailableValueInBlock Res;
Res.BB = BB; Res.BB = BB;
Res.V = V; Res.V = V;
Res.Offset = Offset;
return Res; return Res;
} }
}; };
@ -1209,14 +1216,23 @@ GetAvailableBlockValues(DenseMap<BasicBlock*, Value*> &BlockReplValues,
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
BasicBlock *BB = ValuesPerBlock[i].BB; BasicBlock *BB = ValuesPerBlock[i].BB;
Value *AvailableVal = ValuesPerBlock[i].V; Value *AvailableVal = ValuesPerBlock[i].V;
unsigned Offset = ValuesPerBlock[i].Offset;
Value *&BlockEntry = BlockReplValues[BB]; Value *&BlockEntry = BlockReplValues[BB];
if (BlockEntry) continue; if (BlockEntry) continue;
if (AvailableVal->getType() != LoadTy) { if (AvailableVal->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case"); assert(TD && "Need target data to handle type mismatch case");
AvailableVal = CoerceAvailableValueToLoadType(AvailableVal, LoadTy, AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
BB->getTerminator(), *TD); BB->getTerminator(), *TD);
if (Offset) {
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].V << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].V << '\n' << *ValuesPerBlock[i].V << '\n'
<< *AvailableVal << '\n' << "\n\n\n"); << *AvailableVal << '\n' << "\n\n\n");
@ -1267,6 +1283,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
MemDepResult DepInfo = Deps[i].second; MemDepResult DepInfo = Deps[i].second;
if (DepInfo.isClobber()) { if (DepInfo.isClobber()) {
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
if (TD) {
int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getOperand(0),
Offset));
continue;
}
}
}
// FIXME: Handle memset/memcpy.
UnavailableBlocks.push_back(DepBB); UnavailableBlocks.push_back(DepBB);
continue; continue;
} }
@ -1299,8 +1333,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
S->getOperand(0))); S->getOperand(0)));
continue;
} else if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { }
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load. // If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) { if (LD->getType() != LI->getType()) {
if (TD == 0) if (TD == 0)
@ -1316,11 +1352,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
} }
} }
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
} else {
// FIXME: Handle memset/memcpy.
UnavailableBlocks.push_back(DepBB);
continue; continue;
} }
UnavailableBlocks.push_back(DepBB);
continue;
} }
// If we have no predecessors that produce a known value for this load, exit // If we have no predecessors that produce a known value for this load, exit
@ -1550,10 +1586,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// access code. // access code.
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, TD); int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
if (Offset != -1) { if (Offset != -1) {
Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
L->getType(), L, TD); L->getType(), L, *TD);
DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n"); << *AvailVal << '\n' << *L << "\n\n\n");

View File

@ -199,7 +199,7 @@ Cont:
;; types, and the reload is an offset from the store pointer. ;; types, and the reload is an offset from the store pointer.
;;===----------------------------------------------------------------------===;; ;;===----------------------------------------------------------------------===;;
;; i32 -> f32 forwarding. ;; i32 -> i8 forwarding.
;; PR4216 ;; PR4216
define i8 @coerce_offset0(i32 %V, i32* %P) { define i8 @coerce_offset0(i32 %V, i32* %P) {
store i32 %V, i32* %P store i32 %V, i32* %P
@ -214,5 +214,55 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
; CHECK: ret i8 ; CHECK: ret i8
} }
;; non-local i32/float -> i8 load forwarding.
define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
%P2 = bitcast i32* %P to float*
%P3 = bitcast i32* %P to i8*
%P4 = getelementptr i8* %P3, i32 2
br i1 %cond, label %T, label %F
T:
store i32 42, i32* %P
br label %Cont
F:
store float 1.0, float* %P2
br label %Cont
Cont:
%A = load i8* %P4
ret i8 %A
; CHECK: @coerce_offset_nonlocal0
; CHECK: Cont:
; CHECK: %A = phi i8 [
; CHECK-NOT: load
; CHECK: ret i8 %A
}
;; non-local i32 -> i8 partial redundancy load forwarding.
define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
%P3 = bitcast i32* %P to i8*
%P4 = getelementptr i8* %P3, i32 2
br i1 %cond, label %T, label %F
T:
store i32 42, i32* %P
br label %Cont
F:
br label %Cont
Cont:
%A = load i8* %P4
ret i8 %A
; CHECK: @coerce_offset_pre0
; CHECK: F:
; CHECK: load i8* %P4
; CHECK: Cont:
; CHECK: %A = phi i8 [
; CHECK-NOT: load
; CHECK: ret i8 %A
}