From cb9cbc494975dc827465af32d806e7b9ae17090c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 6 Dec 2009 04:54:31 +0000 Subject: [PATCH] add support for forwarding mem intrinsic values to non-local loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90697 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 104 +++++++++++++++++++++++----------- test/Transforms/GVN/rle.ll | 26 +++++++++ 2 files changed, 97 insertions(+), 33 deletions(-) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index d485e07e076..7454f62b388 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1192,19 +1192,47 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, struct AvailableValueInBlock { /// BB - The basic block in question. BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + MemIntrin // A memory intrinsic which is loaded from. + }; + /// V - The value that is live out of the block. - Value *V; - /// Offset - The byte offset in V that is interesting for the load query. + PointerIntPair Val; + + /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset = 0) { AvailableValueInBlock Res; Res.BB = BB; - Res.V = V; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); Res.Offset = Offset; return Res; } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); + Res.Offset = Offset; + return Res; + } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(!isSimpleValue() && "Wrong accessor"); + return cast(Val.getPointer()); + } }; /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, @@ -1221,30 +1249,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, const Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - BasicBlock *BB = ValuesPerBlock[i].BB; - Value *AvailableVal = ValuesPerBlock[i].V; - unsigned Offset = ValuesPerBlock[i].Offset; + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + BasicBlock *BB = AV.BB; if (SSAUpdate.HasValueForBlock(BB)) continue; - - if (AvailableVal->getType() != LoadTy) { - assert(TD && "Need target data to handle type mismatch case"); - AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, - BB->getTerminator(), *TD); - - if (Offset) { - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + + unsigned Offset = AV.Offset; + + Value *AvailableVal; + if (AV.isSimpleValue()) { + AvailableVal = AV.getSimpleValue(); + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *AV.getSimpleValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - - - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + } else { + AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *AV.getMemIntrinValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - SSAUpdate.AddAvailableValue(BB, AvailableVal); } @@ -1324,19 +1355,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } -#if 0 // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. - if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { + if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { if (TD == 0) TD = getAnalysisIfAvailable(); if (TD) { - int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); + int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, + Offset)); + continue; + } } } -#endif UnavailableBlocks.push_back(DepBB); continue; @@ -1462,19 +1494,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].V == LI) + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) return false; + // FIXME: It is extremely unclear what this loop is doing, other than + // artificially restricting loadpre. if (isSinglePred) { bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast(ValuesPerBlock[i].V)) + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + if (AV.isSimpleValue()) // "Hot" Instruction is in some loop (because it dominates its dep. // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast(AV.getSimpleValue())) + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll index 01d1ebc1a98..af025570b3c 100644 --- a/test/Transforms/GVN/rle.ll +++ b/test/Transforms/GVN/rle.ll @@ -163,6 +163,31 @@ entry: ; CHECK-NEXT: ret float } +;; non-local memset -> i16 load forwarding. +define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) { + %P3 = bitcast i16* %P to i8* + br i1 %cond, label %T, label %F +T: + tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1) + br label %Cont + +F: + tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1) + br label %Cont + +Cont: + %P2 = getelementptr i16* %P, i32 4 + %A = load i16* %P2 + ret i16 %A + +; CHECK: @memset_to_i16_nonlocal0 +; CHECK: Cont: +; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ] +; CHECK-NOT: load +; CHECK: ret i16 %A +} + + declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind @@ -192,6 +217,7 @@ Cont: ; CHECK: ret i8 %A } + ;; non-local i32/float -> i8 load forwarding. This also tests that the "P3" ;; bitcast equivalence can be properly phi translated. define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {