mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-13 04:38:24 +00:00
implement an initial hack at a straight-line store -> memset optimization.
This fires dozens of times across spec and multisource, but I don't know if it actually speeds stuff up. Hopefully the testers will show something nice :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48680 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -33,6 +33,7 @@
|
|||||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||||
#include "llvm/Support/CFG.h"
|
#include "llvm/Support/CFG.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
|
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||||
#include "llvm/Target/TargetData.h"
|
#include "llvm/Target/TargetData.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@ -1036,11 +1037,63 @@ static Value *isBytewiseValue(Value *V) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
|
||||||
|
bool &VariableIdxFound, TargetData &TD) {
|
||||||
|
// Skip over the first indices.
|
||||||
|
gep_type_iterator GTI = gep_type_begin(GEP);
|
||||||
|
for (unsigned i = 1; i != Idx; ++i, ++GTI)
|
||||||
|
/*skip along*/;
|
||||||
|
|
||||||
|
// Compute the offset implied by the rest of the indices.
|
||||||
|
int64_t Offset = 0;
|
||||||
|
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
|
||||||
|
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
|
||||||
|
if (OpC == 0)
|
||||||
|
return VariableIdxFound = true;
|
||||||
|
if (OpC->isZero()) continue; // No offset.
|
||||||
|
|
||||||
|
// Handle struct indices, which add their field offset to the pointer.
|
||||||
|
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
||||||
|
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we have a sequential type like an array or vector. Multiply
|
||||||
|
// the index by the ElementSize.
|
||||||
|
uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
|
||||||
|
Offset += Size*OpC->getSExtValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Offset;
|
||||||
|
}
|
||||||
|
|
||||||
/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2
|
/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2
|
||||||
/// plus the specified constant offset. For example, Ptr1 might be &A[42], and
|
/// plus the specified constant offset. For example, Ptr1 might be &A[42], and
|
||||||
/// Ptr2 might be &A[40] and Offset might be 8.
|
/// Ptr2 might be &A[40] and Offset might be 8.
|
||||||
static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) {
|
static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,
|
||||||
return true;
|
TargetData &TD) {
|
||||||
|
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
|
||||||
|
// base. After that base, they may have some number of common (and
|
||||||
|
// potentially variable) indices. After that they handle some constant
|
||||||
|
// offset, which determines their offset from each other. At this point, we
|
||||||
|
// handle no other case.
|
||||||
|
GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
|
||||||
|
GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
|
||||||
|
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Skip any common indices and track the GEP types.
|
||||||
|
unsigned Idx = 1;
|
||||||
|
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
|
||||||
|
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
|
||||||
|
break;
|
||||||
|
|
||||||
|
bool VariableIdxFound = false;
|
||||||
|
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
|
||||||
|
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
|
||||||
|
if (VariableIdxFound) return false;
|
||||||
|
|
||||||
|
return Offset1 == Offset2+(int64_t)Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1049,7 +1102,6 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) {
|
|||||||
/// neighboring locations of memory. If it sees enough consequtive ones
|
/// neighboring locations of memory. If it sees enough consequtive ones
|
||||||
/// (currently 4) it attempts to merge them together into a memcpy/memset.
|
/// (currently 4) it attempts to merge them together into a memcpy/memset.
|
||||||
bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
||||||
return false;
|
|
||||||
if (SI->isVolatile()) return false;
|
if (SI->isVolatile()) return false;
|
||||||
|
|
||||||
// There are two cases that are interesting for this code to handle: memcpy
|
// There are two cases that are interesting for this code to handle: memcpy
|
||||||
@ -1078,7 +1130,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
SmallVector<StoreInst*, 16> Stores;
|
SmallVector<StoreInst*, 16> Stores;
|
||||||
Stores.push_back(SI);
|
Stores.push_back(SI);
|
||||||
|
|
||||||
BasicBlock::iterator BI = SI; ++BI;
|
BasicBlock::iterator BI = SI;
|
||||||
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
|
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
|
||||||
if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
|
if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
|
||||||
// If the call is readnone, ignore it, otherwise bail out. We don't even
|
// If the call is readnone, ignore it, otherwise bail out. We don't even
|
||||||
@ -1110,7 +1162,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
|
|
||||||
// If so, check to see if the store is before the current range or after it
|
// If so, check to see if the store is before the current range or after it
|
||||||
// in either case, extend the range, otherwise reject it.
|
// in either case, extend the range, otherwise reject it.
|
||||||
if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar)) {
|
if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) {
|
||||||
// Okay, this extends the stored area on the end, just add to the bytes
|
// Okay, this extends the stored area on the end, just add to the bytes
|
||||||
// so far and remember this store.
|
// so far and remember this store.
|
||||||
BytesSoFar += AccessSize;
|
BytesSoFar += AccessSize;
|
||||||
@ -1118,7 +1170,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize)) {
|
if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) {
|
||||||
// Okay, the store is before the current range. Reset our start pointer
|
// Okay, the store is before the current range. Reset our start pointer
|
||||||
// and get new alignment info etc.
|
// and get new alignment info etc.
|
||||||
BytesSoFar += AccessSize;
|
BytesSoFar += AccessSize;
|
||||||
@ -1170,6 +1222,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
|
|||||||
};
|
};
|
||||||
new CallInst(F, Ops, Ops+4, "", InsertPt);
|
new CallInst(F, Ops, Ops+4, "", InsertPt);
|
||||||
|
|
||||||
|
// Zap all the stores.
|
||||||
toErase.append(Stores.begin(), Stores.end());
|
toErase.append(Stores.begin(), Stores.end());
|
||||||
|
|
||||||
++NumMemSetInfer;
|
++NumMemSetInfer;
|
||||||
|
55
test/Transforms/GVN/form-memset.ll
Normal file
55
test/Transforms/GVN/form-memset.ll
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep store
|
||||||
|
; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep llvm.memset
|
||||||
|
|
||||||
|
; All the stores in this example should be merged into a single memset.
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||||
|
target triple = "i386-apple-darwin8"
|
||||||
|
|
||||||
|
define void @foo(i8 signext %c) nounwind {
|
||||||
|
entry:
|
||||||
|
%x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20]
|
||||||
|
%tmp = getelementptr [19 x i8]* %x, i32 0, i32 0 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp, align 1
|
||||||
|
%tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp5, align 1
|
||||||
|
%tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp9, align 1
|
||||||
|
%tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp13, align 1
|
||||||
|
%tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp17, align 1
|
||||||
|
%tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp21, align 1
|
||||||
|
%tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp25, align 1
|
||||||
|
%tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp29, align 1
|
||||||
|
%tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp33, align 1
|
||||||
|
%tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp37, align 1
|
||||||
|
%tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp41, align 1
|
||||||
|
%tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp45, align 1
|
||||||
|
%tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp49, align 1
|
||||||
|
%tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp53, align 1
|
||||||
|
%tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp57, align 1
|
||||||
|
%tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp61, align 1
|
||||||
|
%tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp65, align 1
|
||||||
|
%tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp69, align 1
|
||||||
|
%tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18 ; <i8*> [#uses=1]
|
||||||
|
store i8 %c, i8* %tmp73, align 1
|
||||||
|
%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind ; <i32> [#uses=0]
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @bar(...)
|
||||||
|
|
Reference in New Issue
Block a user