mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
Implement rdar://9009151, transforming strided loop stores of
unsplatable values into memset_pattern16 when it is available (recent darwins). This transforms lots of strided loop stores of ints for example, like 5 in vpr: Formed memset: call void @memset_pattern16(i8* %4, i8* getelementptr inbounds ([16 x i8]* @.memset_pattern9, i32 0, i32 0), i64 %tmp25) from store to: {%3,+,4}<%11> at: store i32 3, i32* %scevgep, align 4, !tbaa !4 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126040 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9bb83e4f48
commit
3a393728a6
@ -39,6 +39,7 @@
|
||||
#define DEBUG_TYPE "loop-idiom"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
@ -76,11 +77,11 @@ namespace {
|
||||
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
|
||||
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
|
||||
|
||||
bool processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
|
||||
unsigned StoreAlignment,
|
||||
Value *SplatValue, Instruction *TheStore,
|
||||
const SCEVAddRecExpr *Ev,
|
||||
const SCEV *BECount);
|
||||
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
||||
unsigned StoreAlignment,
|
||||
Value *SplatValue, Instruction *TheStore,
|
||||
const SCEVAddRecExpr *Ev,
|
||||
const SCEV *BECount);
|
||||
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
||||
const SCEVAddRecExpr *StoreEv,
|
||||
const SCEVAddRecExpr *LoadEv,
|
||||
@ -275,14 +276,11 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
||||
// validity check in mayLoopAccessLocation to be updated though.
|
||||
if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
|
||||
return false;
|
||||
|
||||
// If the stored value is a byte-wise value (like i32 -1), then it may be
|
||||
// turned into a memset of i8 -1, assuming that all the consecutive bytes
|
||||
// are stored. A store of i32 0x01020304 can never be turned into a memset.
|
||||
if (Value *SplatValue = isBytewiseValue(StoredVal))
|
||||
if (processLoopStoreOfSplatValue(StorePtr, StoreSize, SI->getAlignment(),
|
||||
SplatValue, SI, StoreEv, BECount))
|
||||
return true;
|
||||
|
||||
// See if we can optimize just this store in isolation.
|
||||
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
|
||||
StoredVal, SI, StoreEv, BECount))
|
||||
return true;
|
||||
|
||||
// If the stored value is a strided load in the same loop with the same stride
|
||||
// this this may be transformable into a memcpy. This kicks in for stuff like
|
||||
@ -333,9 +331,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
|
||||
if (Stride == 0 || MSI->getLength() != Stride->getValue())
|
||||
return false;
|
||||
|
||||
return processLoopStoreOfSplatValue(Pointer, (unsigned)SizeInBytes,
|
||||
MSI->getAlignment(), MSI->getValue(),
|
||||
MSI, Ev, BECount);
|
||||
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
|
||||
MSI->getAlignment(), MSI->getValue(),
|
||||
MSI, Ev, BECount);
|
||||
}
|
||||
|
||||
|
||||
@ -372,22 +370,97 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
|
||||
return false;
|
||||
}
|
||||
|
||||
/// processLoopStoreOfSplatValue - We see a strided store of a memsetable value.
|
||||
/// If we can transform this into a memset in the loop preheader, do so.
|
||||
bool LoopIdiomRecognize::
|
||||
processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
|
||||
unsigned StoreAlignment, Value *SplatValue,
|
||||
Instruction *TheStore,
|
||||
const SCEVAddRecExpr *Ev, const SCEV *BECount) {
|
||||
// If we're not allowed to form memset, we fail.
|
||||
if (!TLI->has(LibFunc::memset))
|
||||
return false;
|
||||
|
||||
/// getMemSetPatternValue - If a strided store of the specified value is safe to
|
||||
/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
|
||||
/// be passed in. Otherwise, return null.
|
||||
///
|
||||
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
|
||||
/// just replicate their input array and then pass on to memset_pattern16.
|
||||
static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
|
||||
// If the value isn't a constant, we can't promote it to being in a constant
|
||||
// array. We could theoretically do a store to an alloca or something, but
|
||||
// that doesn't seem worthwhile.
|
||||
Constant *C = dyn_cast<Constant>(V);
|
||||
if (C == 0) return 0;
|
||||
|
||||
// Verify that the stored value is loop invariant. If not, we can't promote
|
||||
// the memset.
|
||||
if (!CurLoop->isLoopInvariant(SplatValue))
|
||||
// Only handle simple values that are a power of two bytes in size.
|
||||
uint64_t Size = TD.getTypeSizeInBits(V->getType());
|
||||
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
|
||||
return 0;
|
||||
|
||||
// Convert the constant to an integer type of the appropriate size so we can
|
||||
// start hacking on it.
|
||||
if (isa<PointerType>(V->getType()))
|
||||
C = ConstantExpr::getPtrToInt(C, IntegerType::get(C->getContext(), Size));
|
||||
else if (isa<VectorType>(V->getType()) || V->getType()->isFloatingPointTy())
|
||||
C = ConstantExpr::getBitCast(C, IntegerType::get(C->getContext(), Size));
|
||||
else if (!isa<IntegerType>(V->getType()))
|
||||
return 0; // Unhandled type.
|
||||
|
||||
// Convert to size in bytes.
|
||||
Size /= 8;
|
||||
|
||||
// If we couldn't fold this to an integer, we fail. We don't bother to handle
|
||||
// relocatable expressions like the address of a global yet.
|
||||
// FIXME!
|
||||
ConstantInt *CI = dyn_cast<ConstantInt>(C);
|
||||
if (CI == 0) return 0;
|
||||
|
||||
APInt CVal = CI->getValue();
|
||||
|
||||
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
|
||||
// if the top and bottom are the same.
|
||||
if (Size > 16) return 0;
|
||||
|
||||
// If this is a big endian target (PPC) then we need to bswap.
|
||||
if (TD.isBigEndian())
|
||||
CVal = CVal.byteSwap();
|
||||
|
||||
// Determine what each byte of the pattern value should be.
|
||||
char Value[16];
|
||||
for (unsigned i = 0; i != 16; ++i) {
|
||||
// Get the byte value we're indexing into.
|
||||
unsigned CByte = i % Size;
|
||||
Value[i] = (unsigned char)(CVal.getZExtValue() >> CByte);
|
||||
}
|
||||
|
||||
return ConstantArray::get(V->getContext(), StringRef(Value, 16), false);
|
||||
}
|
||||
|
||||
|
||||
/// processLoopStridedStore - We see a strided store of some value. If we can
|
||||
/// transform this into a memset or memset_pattern in the loop preheader, do so.
|
||||
bool LoopIdiomRecognize::
|
||||
processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
||||
unsigned StoreAlignment, Value *StoredVal,
|
||||
Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
||||
const SCEV *BECount) {
|
||||
|
||||
// If the stored value is a byte-wise value (like i32 -1), then it may be
|
||||
// turned into a memset of i8 -1, assuming that all the consecutive bytes
|
||||
// are stored. A store of i32 0x01020304 can never be turned into a memset,
|
||||
// but it can be turned into memset_pattern if the target supports it.
|
||||
Value *SplatValue = isBytewiseValue(StoredVal);
|
||||
Constant *PatternValue = 0;
|
||||
|
||||
// If we're allowed to form a memset, and the stored value would be acceptable
|
||||
// for memset, use it.
|
||||
if (SplatValue && TLI->has(LibFunc::memset) &&
|
||||
// Verify that the stored value is loop invariant. If not, we can't
|
||||
// promote the memset.
|
||||
CurLoop->isLoopInvariant(SplatValue)) {
|
||||
// Keep and use SplatValue.
|
||||
PatternValue = 0;
|
||||
} else if (TLI->has(LibFunc::memset_pattern16) &&
|
||||
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
|
||||
// It looks like we can use PatternValue!
|
||||
SplatValue = 0;
|
||||
} else {
|
||||
// Otherwise, this isn't an idiom we can transform. For example, we can't
|
||||
// do anything with a 3-byte store, for example.
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
|
||||
// this into a memset in the loop preheader now if we want. However, this
|
||||
@ -415,7 +488,7 @@ processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
|
||||
|
||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
||||
// pointer size if it isn't already.
|
||||
const Type *IntPtr = TD->getIntPtrType(SplatValue->getContext());
|
||||
const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
|
||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
|
||||
|
||||
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
|
||||
@ -427,8 +500,28 @@ processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
|
||||
Value *NumBytes =
|
||||
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
|
||||
|
||||
Value *NewCall =
|
||||
Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
|
||||
Value *NewCall;
|
||||
if (SplatValue)
|
||||
NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
|
||||
else {
|
||||
Module *M = TheStore->getParent()->getParent()->getParent();
|
||||
Value *MSP = M->getOrInsertFunction("memset_pattern16",
|
||||
Builder.getVoidTy(),
|
||||
Builder.getInt8PtrTy(),
|
||||
Builder.getInt8PtrTy(), IntPtr,
|
||||
(void*)0);
|
||||
|
||||
// Otherwise we should form a memset_pattern16. PatternValue is known to be
|
||||
// an constant array of 16-bytes. Plop the value into a mergable global.
|
||||
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
|
||||
GlobalValue::InternalLinkage,
|
||||
PatternValue, ".memset_pattern");
|
||||
GV->setUnnamedAddr(true); // Ok to merge these.
|
||||
GV->setAlignment(16);
|
||||
Value *PatternPtr = Builder.CreateConstInBoundsGEP2_32(GV, 0, 0, "pattern");
|
||||
|
||||
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
|
||||
<< " from store to: " << *Ev << " at: " << *TheStore << "\n");
|
||||
|
@ -273,3 +273,30 @@ for.end13: ; preds = %for.inc10
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
; On darwin10 (which is the triple in this .ll file) this loop can be turned
|
||||
; into a memset_pattern call.
|
||||
; rdar://9009151
|
||||
define void @test11(i32* nocapture %P) nounwind ssp {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||
%arrayidx = getelementptr i32* %P, i64 %indvar
|
||||
store i32 1, i32* %arrayidx, align 4
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
; CHECK: @test11
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: memset_pattern
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user