make memset inference significantly more powerful: it can now handle

memsets that initialize "structs of arrays" and other store sequences
that are not sequential.  This is still only enabled if you pass 
-form-memset-from-stores.  The flag is not heavily tested and I haven't
analyzed the perf regressions when -form-memset-from-stores is passed
either, but this causes no make check regressions.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48909 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2008-03-28 06:45:13 +00:00
parent 02bee85a70
commit 7f0965e7da

View File

@ -36,6 +36,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetData.h"
#include <list>
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@ -1074,11 +1075,11 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
return Offset;
}
/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2
/// plus the specified constant offset. For example, Ptr1 might be &A[42], and
/// Ptr2 might be &A[40] and Offset might be 8.
static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,
TargetData &TD) {
/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
TargetData &TD) {
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
@ -1100,10 +1101,122 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
return Offset1 == Offset2+(int64_t)Offset;
Offset = Offset2-Offset1;
return true;
}
/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
/// store 0 -> P+1
/// store 0 -> P+0
/// store 0 -> P+3
/// store 0 -> P+2
/// which sometimes happens with stores to arrays of structs etc. When we see
/// the first store, we make a range [1, 2). The second store extends the range
/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
/// two ranges into [0, 3) which is memset'able.
namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
// The range is closed at the start and open at the end: [Start, End).
int64_t Start, End;
/// StartPtr - The getelementptr instruction that points to the start of the
/// range.
Value *StartPtr;
/// Alignment - The known alignment of the first store.
unsigned Alignment;
/// TheStores - The actual stores that make up this range.
SmallVector<StoreInst*, 16> TheStores;
};
class MemsetRanges {
/// Ranges - A sorted list of the memset ranges. We use std::list here
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
TargetData &TD;
public:
MemsetRanges(TargetData &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
void addStore(int64_t OffsetFromFirst, StoreInst *SI);
};
}
/// addStore - Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
// Do a linear search of the ranges to see if this can be joined and/or to
// find the insertion point in the list. We keep the ranges sorted for
// simplicity here. This is a linear search of a linked list, which is ugly,
// however the number of ranges is limited, so this won't get crazy slow.
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
++I;
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
if (I == E || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
R.StartPtr = SI->getPointerOperand();
R.Alignment = SI->getAlignment();
R.TheStores.push_back(SI);
return;
}
// This store overlaps with I, add it.
I->TheStores.push_back(SI);
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
if (I->Start <= Start && I->End >= End)
return;
// Now we know that Start <= I->End and End >= I->Start so the range overlaps
// but is not entirely contained within the range.
// See if the range extends the start of the range. In this case, it couldn't
// possibly cause it to join the prior range, because otherwise we would have
// stopped on *it*.
if (Start < I->Start)
I->Start = Start;
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
// is in or right at the end of I), and that End >= I->Start. Extend I out to
// End.
if (End > I->End) {
I->End = End;
range_iterator NextI = I;;
while (++NextI != E && End >= NextI->Start) {
// Merge the range in.
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
if (NextI->End > I->End)
I->End = NextI->End;
Ranges.erase(NextI);
NextI = I;
}
}
}
/// processStore - When GVN is scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consequtive ones
@ -1125,18 +1238,15 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
TargetData &TD = getAnalysis<TargetData>();
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// Okay, so we now have a single store that can be splatable. Try to 'grow'
// this store by looking for neighboring stores to the immediate left or right
// of the store we have so far. While we could in theory handle stores in
// this order: A[0], A[2], A[1]
// in practice, right now we only worry about cases where stores are
// consequtive in increasing or decreasing address order.
uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType());
uint64_t BytesFromSI = 0;
unsigned StartAlign = SI->getAlignment();
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(TD);
// Add our first pointer.
Ranges.addStore(0, SI);
Value *StartPtr = SI->getPointerOperand();
SmallVector<StoreInst*, 16> Stores;
Stores.push_back(SI);
BasicBlock::iterator BI = SI;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@ -1164,77 +1274,69 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
Value *ThisPointer = NextStore->getPointerOperand();
unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
// If so, check to see if the store is before the current range or after it
// in either case, extend the range, otherwise reject it.
if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) {
// Okay, this extends the stored area on the end, just add to the bytes
// so far and remember this store.
BytesSoFar += AccessSize;
Stores.push_back(NextStore);
continue;
}
if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) {
// Okay, the store is before the current range. Reset our start pointer
// and get new alignment info etc.
BytesSoFar += AccessSize;
BytesFromSI += AccessSize;
Stores.push_back(NextStore);
StartPtr = ThisPointer;
StartAlign = NextStore->getAlignment();
continue;
}
// Otherwise, this store wasn't contiguous with our current range, bail out.
break;
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
break;
Ranges.addStore(Offset, NextStore);
}
// If we found less than 4 stores to merge, bail out, it isn't worth losing
// type information in llvm IR to do the transformation.
if (Stores.size() < 4)
return false;
Function *MemSetF = 0;
// Otherwise, we do want to transform this! Create a new memset. We put the
// memset right after the first store that we found in this block. This
// ensures that the caller will increment the iterator to the memset before
// it deletes all the stores.
BasicBlock::iterator InsertPt = SI; ++InsertPt;
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
bool MadeChange = false;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
// If we found less than 4 stores to merge, ignore the subrange: it isn't
// worth losing type information in llvm IR to do the transformation.
if (Range.TheStores.size() < 4)
continue;
// Otherwise, we do want to transform this! Create a new memset. We put
// the memset right after the first store that we found in this block. This
// ensures that the caller will increment the iterator to the memset before
// it deletes all the stores.
BasicBlock::iterator InsertPt = SI; ++InsertPt;
Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent()
if (MemSetF == 0)
MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
->getParent(), Intrinsic::memset_i64);
// StartPtr may not dominate the starting point. Instead of using it, base
// the destination pointer off the input to the first store in the block.
StartPtr = SI->getPointerOperand();
// StartPtr may not dominate the starting point. Instead of using it, base
// the destination pointer off the input to the first store in the block.
StartPtr = SI->getPointerOperand();
// Cast the start ptr to be i8* as memset requires.
const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
if (StartPtr->getType() != i8Ptr)
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
InsertPt);
// Cast the start ptr to be i8* as memset requires.
const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
if (StartPtr->getType() != i8Ptr)
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
InsertPt);
// Offset the pointer if needed.
if (Range.Start)
StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty,
Range.Start),
"ptroffset", InsertPt);
// Offset the pointer if needed.
if (BytesFromSI)
StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty,
-BytesFromSI),
"ptroffset", InsertPt);
Value *Ops[] = {
StartPtr, ByteVal, // Start, value
ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size
ConstantInt::get(Type::Int32Ty, Range.Alignment) // align
};
new CallInst(MemSetF, Ops, Ops+4, "", InsertPt);
Value *Ops[] = {
StartPtr, ByteVal, // Start, value
ConstantInt::get(Type::Int64Ty, BytesSoFar), // size
ConstantInt::get(Type::Int32Ty, StartAlign) // align
};
new CallInst(F, Ops, Ops+4, "", InsertPt);
// Zap all the stores.
toErase.append(Range.TheStores.begin(), Range.TheStores.end());
++NumMemSetInfer;
MadeChange = true;
}
// Zap all the stores.
toErase.append(Stores.begin(), Stores.end());
++NumMemSetInfer;
return true;
return MadeChange;
}