mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-17 04:24:00 +00:00
Merge memsets followed by neighboring memsets and other stores into
larger memsets. Among other things, this fixes rdar://8760394 and allows us to handle "Example 2" from http://blog.regehr.org/archives/320, compiling it into a single 4096-byte memset: _mad_synth_mute: ## @mad_synth_mute ## BB#0: ## %entry pushq %rax movl $4096, %esi ## imm = 0x1000 callq ___bzero popq %rax ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123089 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -322,6 +322,7 @@ namespace {
|
|||||||
|
|
||||||
// Helper fuctions
|
// Helper fuctions
|
||||||
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
|
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
|
||||||
|
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
|
||||||
bool processMemCpy(MemCpyInst *M);
|
bool processMemCpy(MemCpyInst *M);
|
||||||
bool processMemMove(MemMoveInst *M);
|
bool processMemMove(MemMoveInst *M);
|
||||||
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
|
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
|
||||||
@ -511,6 +512,17 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
|
||||||
|
// See if there is another memset or store neighboring this memset which
|
||||||
|
// allows us to widen out the memset to do a single larger store.
|
||||||
|
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
|
||||||
|
MSI->getValue())) {
|
||||||
|
BBI = I; // Don't invalidate iterator.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
|
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
|
||||||
/// and checks for the possibility of a call slot optimization by having
|
/// and checks for the possibility of a call slot optimization by having
|
||||||
@ -775,6 +787,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -884,11 +897,13 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
|
|||||||
|
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||||
MadeChange |= processStore(SI, BI);
|
MadeChange |= processStore(SI, BI);
|
||||||
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) {
|
else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
|
||||||
|
RepeatInstruction = processMemSet(M, BI);
|
||||||
|
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
|
||||||
RepeatInstruction = processMemCpy(M);
|
RepeatInstruction = processMemCpy(M);
|
||||||
} else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
|
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
|
||||||
RepeatInstruction = processMemMove(M);
|
RepeatInstruction = processMemMove(M);
|
||||||
} else if (CallSite CS = (Value*)I) {
|
else if (CallSite CS = (Value*)I) {
|
||||||
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
|
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
|
||||||
if (CS.paramHasAttr(i+1, Attribute::ByVal))
|
if (CS.paramHasAttr(i+1, Attribute::ByVal))
|
||||||
MadeChange |= processByValArgument(CS, i);
|
MadeChange |= processByValArgument(CS, i);
|
||||||
|
@ -164,6 +164,7 @@ entry:
|
|||||||
declare void @foo(%struct.MV*, %struct.MV*, i8*)
|
declare void @foo(%struct.MV*, %struct.MV*, i8*)
|
||||||
|
|
||||||
|
|
||||||
|
; Store followed by memset.
|
||||||
define void @test3(i32* nocapture %P) nounwind ssp {
|
define void @test3(i32* nocapture %P) nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
%arrayidx = getelementptr inbounds i32* %P, i64 1
|
%arrayidx = getelementptr inbounds i32* %P, i64 1
|
||||||
@ -177,6 +178,7 @@ entry:
|
|||||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; store followed by memset, different offset scenario
|
||||||
define void @test4(i32* nocapture %P) nounwind ssp {
|
define void @test4(i32* nocapture %P) nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
store i32 0, i32* %P, align 4
|
store i32 0, i32* %P, align 4
|
||||||
@ -191,4 +193,30 @@ entry:
|
|||||||
|
|
||||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
||||||
|
|
||||||
|
; Memset followed by store.
|
||||||
|
define void @test5(i32* nocapture %P) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%add.ptr = getelementptr inbounds i32* %P, i64 2
|
||||||
|
%0 = bitcast i32* %add.ptr to i8*
|
||||||
|
tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
|
||||||
|
%arrayidx = getelementptr inbounds i32* %P, i64 1
|
||||||
|
store i32 0, i32* %arrayidx, align 4
|
||||||
|
ret void
|
||||||
|
; CHECK: @test5
|
||||||
|
; CHECK-NOT: store
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
|
||||||
|
}
|
||||||
|
|
||||||
|
;; Memset followed by memset.
|
||||||
|
define void @test6(i32* nocapture %P) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32* %P to i8*
|
||||||
|
tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
|
||||||
|
%add.ptr = getelementptr inbounds i32* %P, i64 3
|
||||||
|
%1 = bitcast i32* %add.ptr to i8*
|
||||||
|
tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
|
||||||
|
ret void
|
||||||
|
; CHECK: @test6
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
|
||||||
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user