mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
make memset inference significantly more powerful: it can now handle
memsets that initialize "structs of arrays" and other store sequences that are not sequential. This is still only enabled if you pass -form-memset-from-stores. The flag is not heavily tested and I haven't analyzed the perf regressions when -form-memset-from-stores is passed either, but this causes no make check regressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48909 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
02bee85a70
commit
7f0965e7da
@ -36,6 +36,7 @@
|
|||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||||
#include "llvm/Target/TargetData.h"
|
#include "llvm/Target/TargetData.h"
|
||||||
|
#include <list>
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
STATISTIC(NumGVNInstr, "Number of instructions deleted");
|
STATISTIC(NumGVNInstr, "Number of instructions deleted");
|
||||||
@ -1074,11 +1075,11 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
|
|||||||
return Offset;
|
return Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2
|
/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
|
||||||
/// plus the specified constant offset. For example, Ptr1 might be &A[42], and
|
/// constant offset, and return that constant offset. For example, Ptr1 might
|
||||||
/// Ptr2 might be &A[40] and Offset might be 8.
|
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
|
||||||
static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,
|
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||||
TargetData &TD) {
|
TargetData &TD) {
|
||||||
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
|
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
|
||||||
// base. After that base, they may have some number of common (and
|
// base. After that base, they may have some number of common (and
|
||||||
// potentially variable) indices. After that they handle some constant
|
// potentially variable) indices. After that they handle some constant
|
||||||
@ -1100,10 +1101,122 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,
|
|||||||
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
|
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
|
||||||
if (VariableIdxFound) return false;
|
if (VariableIdxFound) return false;
|
||||||
|
|
||||||
return Offset1 == Offset2+(int64_t)Offset;
|
Offset = Offset2-Offset1;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
|
||||||
|
/// This allows us to analyze stores like:
|
||||||
|
/// store 0 -> P+1
|
||||||
|
/// store 0 -> P+0
|
||||||
|
/// store 0 -> P+3
|
||||||
|
/// store 0 -> P+2
|
||||||
|
/// which sometimes happens with stores to arrays of structs etc. When we see
|
||||||
|
/// the first store, we make a range [1, 2). The second store extends the range
|
||||||
|
/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
|
||||||
|
/// two ranges into [0, 3) which is memset'able.
|
||||||
|
namespace {
|
||||||
|
struct MemsetRange {
|
||||||
|
// Start/End - A semi range that describes the span that this range covers.
|
||||||
|
// The range is closed at the start and open at the end: [Start, End).
|
||||||
|
int64_t Start, End;
|
||||||
|
|
||||||
|
/// StartPtr - The getelementptr instruction that points to the start of the
|
||||||
|
/// range.
|
||||||
|
Value *StartPtr;
|
||||||
|
|
||||||
|
/// Alignment - The known alignment of the first store.
|
||||||
|
unsigned Alignment;
|
||||||
|
|
||||||
|
/// TheStores - The actual stores that make up this range.
|
||||||
|
SmallVector<StoreInst*, 16> TheStores;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class MemsetRanges {
|
||||||
|
/// Ranges - A sorted list of the memset ranges. We use std::list here
|
||||||
|
/// because each element is relatively large and expensive to copy.
|
||||||
|
std::list<MemsetRange> Ranges;
|
||||||
|
typedef std::list<MemsetRange>::iterator range_iterator;
|
||||||
|
TargetData &TD;
|
||||||
|
public:
|
||||||
|
MemsetRanges(TargetData &td) : TD(td) {}
|
||||||
|
|
||||||
|
typedef std::list<MemsetRange>::const_iterator const_iterator;
|
||||||
|
const_iterator begin() const { return Ranges.begin(); }
|
||||||
|
const_iterator end() const { return Ranges.end(); }
|
||||||
|
|
||||||
|
|
||||||
|
void addStore(int64_t OffsetFromFirst, StoreInst *SI);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/// addStore - Add a new store to the MemsetRanges data structure. This adds a
|
||||||
|
/// new range for the specified store at the specified offset, merging into
|
||||||
|
/// existing ranges as appropriate.
|
||||||
|
void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
|
||||||
|
int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
|
||||||
|
|
||||||
|
// Do a linear search of the ranges to see if this can be joined and/or to
|
||||||
|
// find the insertion point in the list. We keep the ranges sorted for
|
||||||
|
// simplicity here. This is a linear search of a linked list, which is ugly,
|
||||||
|
// however the number of ranges is limited, so this won't get crazy slow.
|
||||||
|
range_iterator I = Ranges.begin(), E = Ranges.end();
|
||||||
|
|
||||||
|
while (I != E && Start > I->End)
|
||||||
|
++I;
|
||||||
|
|
||||||
|
// We now know that I == E, in which case we didn't find anything to merge
|
||||||
|
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
|
||||||
|
// to insert a new range. Handle this now.
|
||||||
|
if (I == E || End < I->Start) {
|
||||||
|
MemsetRange &R = *Ranges.insert(I, MemsetRange());
|
||||||
|
R.Start = Start;
|
||||||
|
R.End = End;
|
||||||
|
R.StartPtr = SI->getPointerOperand();
|
||||||
|
R.Alignment = SI->getAlignment();
|
||||||
|
R.TheStores.push_back(SI);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This store overlaps with I, add it.
|
||||||
|
I->TheStores.push_back(SI);
|
||||||
|
|
||||||
|
// At this point, we may have an interval that completely contains our store.
|
||||||
|
// If so, just add it to the interval and return.
|
||||||
|
if (I->Start <= Start && I->End >= End)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Now we know that Start <= I->End and End >= I->Start so the range overlaps
|
||||||
|
// but is not entirely contained within the range.
|
||||||
|
|
||||||
|
// See if the range extends the start of the range. In this case, it couldn't
|
||||||
|
// possibly cause it to join the prior range, because otherwise we would have
|
||||||
|
// stopped on *it*.
|
||||||
|
if (Start < I->Start)
|
||||||
|
I->Start = Start;
|
||||||
|
|
||||||
|
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
|
||||||
|
// is in or right at the end of I), and that End >= I->Start. Extend I out to
|
||||||
|
// End.
|
||||||
|
if (End > I->End) {
|
||||||
|
I->End = End;
|
||||||
|
range_iterator NextI = I;;
|
||||||
|
while (++NextI != E && End >= NextI->Start) {
|
||||||
|
// Merge the range in.
|
||||||
|
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
|
||||||
|
if (NextI->End > I->End)
|
||||||
|
I->End = NextI->End;
|
||||||
|
Ranges.erase(NextI);
|
||||||
|
NextI = I;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// processStore - When GVN is scanning forward over instructions, we look for
|
/// processStore - When GVN is scanning forward over instructions, we look for
|
||||||
/// some other patterns to fold away. In particular, this looks for stores to
|
/// some other patterns to fold away. In particular, this looks for stores to
|
||||||
/// neighboring locations of memory. If it sees enough consequtive ones
|
/// neighboring locations of memory. If it sees enough consequtive ones
|
||||||
@ -1125,18 +1238,15 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
TargetData &TD = getAnalysis<TargetData>();
|
TargetData &TD = getAnalysis<TargetData>();
|
||||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||||
|
|
||||||
// Okay, so we now have a single store that can be splatable. Try to 'grow'
|
// Okay, so we now have a single store that can be splatable. Scan to find
|
||||||
// this store by looking for neighboring stores to the immediate left or right
|
// all subsequent stores of the same value to offset from the same pointer.
|
||||||
// of the store we have so far. While we could in theory handle stores in
|
// Join these together into ranges, so we can decide whether contiguous blocks
|
||||||
// this order: A[0], A[2], A[1]
|
// are stored.
|
||||||
// in practice, right now we only worry about cases where stores are
|
MemsetRanges Ranges(TD);
|
||||||
// consequtive in increasing or decreasing address order.
|
|
||||||
uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType());
|
// Add our first pointer.
|
||||||
uint64_t BytesFromSI = 0;
|
Ranges.addStore(0, SI);
|
||||||
unsigned StartAlign = SI->getAlignment();
|
|
||||||
Value *StartPtr = SI->getPointerOperand();
|
Value *StartPtr = SI->getPointerOperand();
|
||||||
SmallVector<StoreInst*, 16> Stores;
|
|
||||||
Stores.push_back(SI);
|
|
||||||
|
|
||||||
BasicBlock::iterator BI = SI;
|
BasicBlock::iterator BI = SI;
|
||||||
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
|
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
|
||||||
@ -1164,77 +1274,69 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
// Check to see if this stored value is of the same byte-splattable value.
|
// Check to see if this stored value is of the same byte-splattable value.
|
||||||
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
|
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
Value *ThisPointer = NextStore->getPointerOperand();
|
|
||||||
unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
|
|
||||||
|
|
||||||
// If so, check to see if the store is before the current range or after it
|
|
||||||
// in either case, extend the range, otherwise reject it.
|
|
||||||
if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) {
|
|
||||||
// Okay, this extends the stored area on the end, just add to the bytes
|
|
||||||
// so far and remember this store.
|
|
||||||
BytesSoFar += AccessSize;
|
|
||||||
Stores.push_back(NextStore);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) {
|
|
||||||
// Okay, the store is before the current range. Reset our start pointer
|
|
||||||
// and get new alignment info etc.
|
|
||||||
BytesSoFar += AccessSize;
|
|
||||||
BytesFromSI += AccessSize;
|
|
||||||
Stores.push_back(NextStore);
|
|
||||||
StartPtr = ThisPointer;
|
|
||||||
StartAlign = NextStore->getAlignment();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, this store wasn't contiguous with our current range, bail out.
|
// Check to see if this store is to a constant offset from the start ptr.
|
||||||
break;
|
int64_t Offset;
|
||||||
|
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
|
||||||
|
break;
|
||||||
|
|
||||||
|
Ranges.addStore(Offset, NextStore);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we found less than 4 stores to merge, bail out, it isn't worth losing
|
Function *MemSetF = 0;
|
||||||
// type information in llvm IR to do the transformation.
|
|
||||||
if (Stores.size() < 4)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Otherwise, we do want to transform this! Create a new memset. We put the
|
// Now that we have full information about ranges, loop over the ranges and
|
||||||
// memset right after the first store that we found in this block. This
|
// emit memset's for anything big enough to be worthwhile.
|
||||||
// ensures that the caller will increment the iterator to the memset before
|
bool MadeChange = false;
|
||||||
// it deletes all the stores.
|
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
|
||||||
BasicBlock::iterator InsertPt = SI; ++InsertPt;
|
I != E; ++I) {
|
||||||
|
const MemsetRange &Range = *I;
|
||||||
|
|
||||||
|
// If we found less than 4 stores to merge, ignore the subrange: it isn't
|
||||||
|
// worth losing type information in llvm IR to do the transformation.
|
||||||
|
if (Range.TheStores.size() < 4)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Otherwise, we do want to transform this! Create a new memset. We put
|
||||||
|
// the memset right after the first store that we found in this block. This
|
||||||
|
// ensures that the caller will increment the iterator to the memset before
|
||||||
|
// it deletes all the stores.
|
||||||
|
BasicBlock::iterator InsertPt = SI; ++InsertPt;
|
||||||
|
|
||||||
Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent()
|
if (MemSetF == 0)
|
||||||
|
MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
|
||||||
->getParent(), Intrinsic::memset_i64);
|
->getParent(), Intrinsic::memset_i64);
|
||||||
|
|
||||||
|
// StartPtr may not dominate the starting point. Instead of using it, base
|
||||||
|
// the destination pointer off the input to the first store in the block.
|
||||||
|
StartPtr = SI->getPointerOperand();
|
||||||
|
|
||||||
// StartPtr may not dominate the starting point. Instead of using it, base
|
// Cast the start ptr to be i8* as memset requires.
|
||||||
// the destination pointer off the input to the first store in the block.
|
const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
|
||||||
StartPtr = SI->getPointerOperand();
|
if (StartPtr->getType() != i8Ptr)
|
||||||
|
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
|
||||||
|
InsertPt);
|
||||||
|
|
||||||
// Cast the start ptr to be i8* as memset requires.
|
// Offset the pointer if needed.
|
||||||
const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
|
if (Range.Start)
|
||||||
if (StartPtr->getType() != i8Ptr)
|
StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty,
|
||||||
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
|
Range.Start),
|
||||||
InsertPt);
|
"ptroffset", InsertPt);
|
||||||
|
|
||||||
// Offset the pointer if needed.
|
Value *Ops[] = {
|
||||||
if (BytesFromSI)
|
StartPtr, ByteVal, // Start, value
|
||||||
StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty,
|
ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size
|
||||||
-BytesFromSI),
|
ConstantInt::get(Type::Int32Ty, Range.Alignment) // align
|
||||||
"ptroffset", InsertPt);
|
};
|
||||||
|
new CallInst(MemSetF, Ops, Ops+4, "", InsertPt);
|
||||||
|
|
||||||
Value *Ops[] = {
|
// Zap all the stores.
|
||||||
StartPtr, ByteVal, // Start, value
|
toErase.append(Range.TheStores.begin(), Range.TheStores.end());
|
||||||
ConstantInt::get(Type::Int64Ty, BytesSoFar), // size
|
++NumMemSetInfer;
|
||||||
ConstantInt::get(Type::Int32Ty, StartAlign) // align
|
MadeChange = true;
|
||||||
};
|
}
|
||||||
new CallInst(F, Ops, Ops+4, "", InsertPt);
|
|
||||||
|
|
||||||
// Zap all the stores.
|
return MadeChange;
|
||||||
toErase.append(Stores.begin(), Stores.end());
|
|
||||||
|
|
||||||
++NumMemSetInfer;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user