From 7f0965e7daa17f0c5987fb923cab94ed7f6f4d78 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Fri, 28 Mar 2008 06:45:13 +0000 Subject: [PATCH] make memset inference significantly more powerful: it can now handle memsets that initialize "structs of arrays" and other store sequences that are not sequential. This is still only enabled if you pass -form-memset-from-stores. The flag is not heavily tested and I haven't analyzed the perf regressions when -form-memset-from-stores is passed either, but this causes no make check regressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48909 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 256 ++++++++++++++++++++++++---------- 1 file changed, 179 insertions(+), 77 deletions(-) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a9445dd8530..ba6e361ecbb 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetData.h" +#include using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -1074,11 +1075,11 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, return Offset; } -/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2 -/// plus the specified constant offset. For example, Ptr1 might be &A[42], and -/// Ptr2 might be &A[40] and Offset might be 8. -static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, - TargetData &TD) { +/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a +/// constant offset, and return that constant offset. For example, Ptr1 might +/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. +static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, + TargetData &TD) { // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant @@ -1100,10 +1101,122 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); if (VariableIdxFound) return false; - return Offset1 == Offset2+(int64_t)Offset; + Offset = Offset2-Offset1; + return true; } +/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. +/// This allows us to analyze stores like: +/// store 0 -> P+1 +/// store 0 -> P+0 +/// store 0 -> P+3 +/// store 0 -> P+2 +/// which sometimes happens with stores to arrays of structs etc. When we see +/// the first store, we make a range [1, 2). The second store extends the range +/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the +/// two ranges into [0, 3) which is memset'able. +namespace { +struct MemsetRange { + // Start/End - A semi range that describes the span that this range covers. + // The range is closed at the start and open at the end: [Start, End). + int64_t Start, End; + + /// StartPtr - The getelementptr instruction that points to the start of the + /// range. + Value *StartPtr; + + /// Alignment - The known alignment of the first store. + unsigned Alignment; + + /// TheStores - The actual stores that make up this range. + SmallVector TheStores; +}; + + +class MemsetRanges { + /// Ranges - A sorted list of the memset ranges. We use std::list here + /// because each element is relatively large and expensive to copy. + std::list Ranges; + typedef std::list::iterator range_iterator; + TargetData &TD; +public: + MemsetRanges(TargetData &td) : TD(td) {} + + typedef std::list::const_iterator const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + + + void addStore(int64_t OffsetFromFirst, StoreInst *SI); +}; + +} + +/// addStore - Add a new store to the MemsetRanges data structure. This adds a +/// new range for the specified store at the specified offset, merging into +/// existing ranges as appropriate. +void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { + int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + // Do a linear search of the ranges to see if this can be joined and/or to + // find the insertion point in the list. We keep the ranges sorted for + // simplicity here. This is a linear search of a linked list, which is ugly, + // however the number of ranges is limited, so this won't get crazy slow. + range_iterator I = Ranges.begin(), E = Ranges.end(); + + while (I != E && Start > I->End) + ++I; + + // We now know that I == E, in which case we didn't find anything to merge + // with, or that Start <= I->End. If End < I->Start or I == E, then we need + // to insert a new range. Handle this now. + if (I == E || End < I->Start) { + MemsetRange &R = *Ranges.insert(I, MemsetRange()); + R.Start = Start; + R.End = End; + R.StartPtr = SI->getPointerOperand(); + R.Alignment = SI->getAlignment(); + R.TheStores.push_back(SI); + return; + } + + // This store overlaps with I, add it. + I->TheStores.push_back(SI); + + // At this point, we may have an interval that completely contains our store. + // If so, just add it to the interval and return. + if (I->Start <= Start && I->End >= End) + return; + + // Now we know that Start <= I->End and End >= I->Start so the range overlaps + // but is not entirely contained within the range. + + // See if the range extends the start of the range. In this case, it couldn't + // possibly cause it to join the prior range, because otherwise we would have + // stopped on *it*. + if (Start < I->Start) + I->Start = Start; + + // Now we know that Start <= I->End and Start >= I->Start (so the startpoint + // is in or right at the end of I), and that End >= I->Start. Extend I out to + // End. + if (End > I->End) { + I->End = End; + range_iterator NextI = I;; + while (++NextI != E && End >= NextI->Start) { + // Merge the range in. + I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); + if (NextI->End > I->End) + I->End = NextI->End; + Ranges.erase(NextI); + NextI = I; + } + } +} + + + /// processStore - When GVN is scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones @@ -1125,18 +1238,15 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { TargetData &TD = getAnalysis(); AliasAnalysis &AA = getAnalysis(); - // Okay, so we now have a single store that can be splatable. Try to 'grow' - // this store by looking for neighboring stores to the immediate left or right - // of the store we have so far. While we could in theory handle stores in - // this order: A[0], A[2], A[1] - // in practice, right now we only worry about cases where stores are - // consequtive in increasing or decreasing address order. - uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType()); - uint64_t BytesFromSI = 0; - unsigned StartAlign = SI->getAlignment(); + // Okay, so we now have a single store that can be splatable. Scan to find + // all subsequent stores of the same value to offset from the same pointer. + // Join these together into ranges, so we can decide whether contiguous blocks + // are stored. + MemsetRanges Ranges(TD); + + // Add our first pointer. + Ranges.addStore(0, SI); Value *StartPtr = SI->getPointerOperand(); - SmallVector Stores; - Stores.push_back(SI); BasicBlock::iterator BI = SI; for (++BI; !isa(BI); ++BI) { @@ -1164,77 +1274,69 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; - - Value *ThisPointer = NextStore->getPointerOperand(); - unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); - - // If so, check to see if the store is before the current range or after it - // in either case, extend the range, otherwise reject it. - if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) { - // Okay, this extends the stored area on the end, just add to the bytes - // so far and remember this store. - BytesSoFar += AccessSize; - Stores.push_back(NextStore); - continue; - } - - if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) { - // Okay, the store is before the current range. Reset our start pointer - // and get new alignment info etc. - BytesSoFar += AccessSize; - BytesFromSI += AccessSize; - Stores.push_back(NextStore); - StartPtr = ThisPointer; - StartAlign = NextStore->getAlignment(); - continue; - } - // Otherwise, this store wasn't contiguous with our current range, bail out. - break; + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) + break; + + Ranges.addStore(Offset, NextStore); } - // If we found less than 4 stores to merge, bail out, it isn't worth losing - // type information in llvm IR to do the transformation. - if (Stores.size() < 4) - return false; + Function *MemSetF = 0; - // Otherwise, we do want to transform this! Create a new memset. We put the - // memset right after the first store that we found in this block. This - // ensures that the caller will increment the iterator to the memset before - // it deletes all the stores. - BasicBlock::iterator InsertPt = SI; ++InsertPt; + // Now that we have full information about ranges, loop over the ranges and + // emit memset's for anything big enough to be worthwhile. + bool MadeChange = false; + for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); + I != E; ++I) { + const MemsetRange &Range = *I; + + // If we found less than 4 stores to merge, ignore the subrange: it isn't + // worth losing type information in llvm IR to do the transformation. + if (Range.TheStores.size() < 4) + continue; + + // Otherwise, we do want to transform this! Create a new memset. We put + // the memset right after the first store that we found in this block. This + // ensures that the caller will increment the iterator to the memset before + // it deletes all the stores. + BasicBlock::iterator InsertPt = SI; ++InsertPt; - Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent() + if (MemSetF == 0) + MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() ->getParent(), Intrinsic::memset_i64); + + // StartPtr may not dominate the starting point. Instead of using it, base + // the destination pointer off the input to the first store in the block. + StartPtr = SI->getPointerOperand(); - // StartPtr may not dominate the starting point. Instead of using it, base - // the destination pointer off the input to the first store in the block. - StartPtr = SI->getPointerOperand(); + // Cast the start ptr to be i8* as memset requires. + const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); + if (StartPtr->getType() != i8Ptr) + StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), + InsertPt); - // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); - if (StartPtr->getType() != i8Ptr) - StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), - InsertPt); + // Offset the pointer if needed. + if (Range.Start) + StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, + Range.Start), + "ptroffset", InsertPt); - // Offset the pointer if needed. - if (BytesFromSI) - StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, - -BytesFromSI), - "ptroffset", InsertPt); + Value *Ops[] = { + StartPtr, ByteVal, // Start, value + ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size + ConstantInt::get(Type::Int32Ty, Range.Alignment) // align + }; + new CallInst(MemSetF, Ops, Ops+4, "", InsertPt); - Value *Ops[] = { - StartPtr, ByteVal, // Start, value - ConstantInt::get(Type::Int64Ty, BytesSoFar), // size - ConstantInt::get(Type::Int32Ty, StartAlign) // align - }; - new CallInst(F, Ops, Ops+4, "", InsertPt); + // Zap all the stores. + toErase.append(Range.TheStores.begin(), Range.TheStores.end()); + ++NumMemSetInfer; + MadeChange = true; + } - // Zap all the stores. - toErase.append(Stores.begin(), Stores.end()); - - ++NumMemSetInfer; - return true; + return MadeChange; }