mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-04 10:30:01 +00:00
Implement PR8644: forwarding a memcpy value to a byval,
allowing the memcpy to be eliminated. Unfortunately, the requirements on byval's without explicit alignment are really weak and impossible to predict in the mid-level optimizer, so this doesn't kick in much with current frontends. The fix is to change clang to set alignment on all byval arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119916 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a6fd81dd7f
commit
2f5f90ad3e
@ -52,11 +52,6 @@ public:
|
||||
CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
|
||||
CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
|
||||
CallSiteBase(ValTy *II) { *this = get(II); }
|
||||
CallSiteBase(InstrTy *II) {
|
||||
assert(II && "Null instruction given?");
|
||||
*this = get(II);
|
||||
assert(I.getPointer() && "Not a call?");
|
||||
}
|
||||
protected:
|
||||
/// CallSiteBase::get - This static method is sort of like a constructor. It
|
||||
/// will create an appropriate call site for a Call or Invoke instruction, but
|
||||
|
@ -301,11 +301,13 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
|
||||
|
||||
namespace {
|
||||
class MemCpyOpt : public FunctionPass {
|
||||
MemoryDependenceAnalysis *MD;
|
||||
bool runOnFunction(Function &F);
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
MemCpyOpt() : FunctionPass(ID) {
|
||||
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
|
||||
MD = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -327,6 +329,7 @@ namespace {
|
||||
uint64_t cpyLen, CallInst *C);
|
||||
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
uint64_t MSize);
|
||||
bool processByValArgument(CallSite CS, unsigned ArgNo);
|
||||
bool iterateOnFunction(Function &F);
|
||||
};
|
||||
|
||||
@ -359,9 +362,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
// a memcpy.
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
|
||||
if (!LI->isVolatile() && LI->hasOneUse()) {
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
MemDepResult dep = MD.getDependency(LI);
|
||||
MemDepResult dep = MD->getDependency(LI);
|
||||
CallInst *C = 0;
|
||||
if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
|
||||
C = dyn_cast<CallInst>(dep.getInst());
|
||||
@ -372,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
LI->getPointerOperand()->stripPointerCasts(),
|
||||
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
|
||||
if (changed) {
|
||||
MD.removeInstruction(SI);
|
||||
MD->removeInstruction(SI);
|
||||
SI->eraseFromParent();
|
||||
LI->eraseFromParent();
|
||||
++NumMemCpyInstr;
|
||||
@ -505,8 +506,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
|
||||
DEBUG(dbgs() << "Replace stores:\n";
|
||||
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
|
||||
dbgs() << *Range.TheStores[i];
|
||||
dbgs() << "With: " << *C); C=C;
|
||||
dbgs() << *Range.TheStores[i] << '\n';
|
||||
dbgs() << "With: " << *C << '\n'); C=C;
|
||||
|
||||
// Don't invalidate the iterator
|
||||
BBI = BI;
|
||||
@ -657,11 +658,10 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
|
||||
|
||||
// Drop any cached information about the call, because we may have changed
|
||||
// its dependence information by changing its parameter.
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
MD.removeInstruction(C);
|
||||
MD->removeInstruction(C);
|
||||
|
||||
// Remove the memcpy
|
||||
MD.removeInstruction(cpy);
|
||||
// Remove the memcpy.
|
||||
MD->removeInstruction(cpy);
|
||||
++NumMemCpyInstr;
|
||||
|
||||
return true;
|
||||
@ -675,7 +675,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
uint64_t MSize) {
|
||||
// We can only transforms memcpy's where the dest of one is the source of the
|
||||
// other.
|
||||
if (M->getSource() != MDep->getDest())
|
||||
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
|
||||
return false;
|
||||
|
||||
// Second, the length of the memcpy's must be the same, or the preceeding one
|
||||
@ -684,27 +684,26 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
if (!C1) return false;
|
||||
|
||||
uint64_t DepSize = C1->getValue().getZExtValue();
|
||||
|
||||
if (DepSize < MSize)
|
||||
return false;
|
||||
|
||||
Intrinsic::ID ResultFn = Intrinsic::memcpy;
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
|
||||
// If the dest of the second might alias the source of the first, then the
|
||||
// source and dest might overlap. We still want to eliminate the intermediate
|
||||
// value, but we have to generate a memmove instead of memcpy.
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
Intrinsic::ID ResultFn = Intrinsic::memcpy;
|
||||
if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
|
||||
ResultFn = Intrinsic::memmove;
|
||||
|
||||
// If all checks passed, then we can transform these memcpy's
|
||||
// If all checks passed, then we can transform M.
|
||||
const Type *ArgTys[3] = {
|
||||
M->getRawDest()->getType(),
|
||||
MDep->getRawSource()->getType(),
|
||||
M->getLength()->getType()
|
||||
};
|
||||
Function *MemCpyFun =
|
||||
Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
|
||||
Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(),
|
||||
ResultFn, ArgTys, 3);
|
||||
|
||||
// Make sure to use the lesser of the alignment of the source and the dest
|
||||
@ -717,29 +716,30 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
M->getRawDest(),
|
||||
MDep->getRawSource(),
|
||||
M->getLength(),
|
||||
ConstantInt::get(Type::getInt32Ty(M->getContext()), Align),
|
||||
ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align),
|
||||
M->getVolatileCst()
|
||||
};
|
||||
CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
|
||||
|
||||
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the two
|
||||
// transfers. For example, in:
|
||||
// memcpy(a <- b)
|
||||
// *b = 42;
|
||||
// memcpy(c <- a)
|
||||
// It would be invalid to transform the second memcpy into memcpy(c <- b).
|
||||
MemDepResult NewDep = MD.getDependency(C);
|
||||
//
|
||||
// TODO: If the code between M and MDep is transparent to the destination "c",
|
||||
// then we could still perform the xform by moving M up to the first memcpy.
|
||||
MemDepResult NewDep = MD->getDependency(C);
|
||||
if (!NewDep.isClobber() || NewDep.getInst() != MDep) {
|
||||
MD.removeInstruction(C);
|
||||
MD->removeInstruction(C);
|
||||
C->eraseFromParent();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise we're good! Nuke the instruction we're replacing.
|
||||
MD.removeInstruction(M);
|
||||
MD->removeInstruction(M);
|
||||
M->eraseFromParent();
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
@ -752,25 +752,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
/// circumstances). This allows later passes to remove the first memcpy
|
||||
/// altogether.
|
||||
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
// We can only optimize statically-sized memcpy's.
|
||||
ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!cpyLen) return false;
|
||||
// We can only optimize statically-sized memcpy's that are non-volatile.
|
||||
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (CopySize == 0 || M->isVolatile()) return false;
|
||||
|
||||
// The are two possible optimizations we can do for memcpy:
|
||||
// a) memcpy-memcpy xform which exposes redundance for DSE.
|
||||
// b) call-memcpy xform for return slot optimization.
|
||||
MemDepResult dep = MD.getDependency(M);
|
||||
if (!dep.isClobber())
|
||||
MemDepResult DepInfo = MD->getDependency(M);
|
||||
if (!DepInfo.isClobber())
|
||||
return false;
|
||||
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(dep.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep, cpyLen->getZExtValue());
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
|
||||
|
||||
if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
|
||||
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
|
||||
bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
|
||||
cpyLen->getZExtValue(), C);
|
||||
CopySize->getZExtValue(), C);
|
||||
if (changed) M->eraseFromParent();
|
||||
return changed;
|
||||
}
|
||||
@ -805,33 +803,116 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
|
||||
|
||||
// MemDep may have over conservative information about this instruction, just
|
||||
// conservatively flush it from the cache.
|
||||
getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
|
||||
MD->removeInstruction(M);
|
||||
|
||||
++NumMoveToCpy;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// processByValArgument - This is called on every byval argument in call sites.
|
||||
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
||||
TargetData *TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (!TD) return false;
|
||||
|
||||
// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
|
||||
Value *ByValArg = CS.getArgument(ArgNo);
|
||||
|
||||
// MemDep doesn't have a way to do a local query with a memory location.
|
||||
// Instead, just insert a load and ask for its dependences.
|
||||
LoadInst *TmpLoad = new LoadInst(ByValArg, "", CS.getInstruction());
|
||||
MemDepResult DepInfo = MD->getDependency(TmpLoad);
|
||||
|
||||
MD->removeInstruction(TmpLoad);
|
||||
TmpLoad->eraseFromParent();
|
||||
|
||||
if (!DepInfo.isClobber())
|
||||
return false;
|
||||
|
||||
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
|
||||
// a memcpy, see if we can byval from the source of the memcpy instead of the
|
||||
// result.
|
||||
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
|
||||
if (MDep == 0 || MDep->isVolatile() ||
|
||||
ByValArg->stripPointerCasts() != MDep->getDest())
|
||||
return false;
|
||||
|
||||
// The length of the memcpy must be larger or equal to the size of the byval.
|
||||
// must be larger than the following one.
|
||||
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
if (C1 == 0 ||
|
||||
C1->getValue().getZExtValue() < TD->getTypeAllocSize(ByValArg->getType()))
|
||||
return false;
|
||||
|
||||
// Get the alignment of the byval. If it is greater than the memcpy, then we
|
||||
// can't do the substitution. If the call doesn't specify the alignment, then
|
||||
// it is some target specific value that we can't know.
|
||||
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
|
||||
if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
|
||||
return false;
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the memcpy and
|
||||
// the byval call.
|
||||
// memcpy(a <- b)
|
||||
// *b = 42;
|
||||
// foo(*a)
|
||||
// It would be invalid to transform the second memcpy into foo(*b).
|
||||
Value *TmpCast = MDep->getSource();
|
||||
if (MDep->getSource()->getType() != ByValArg->getType())
|
||||
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
|
||||
"tmpcast", CS.getInstruction());
|
||||
Value *TmpVal =
|
||||
UndefValue::get(cast<PointerType>(TmpCast->getType())->getElementType());
|
||||
Instruction *TmpStore = new StoreInst(TmpVal, TmpCast, false,
|
||||
CS.getInstruction());
|
||||
DepInfo = MD->getDependency(TmpStore);
|
||||
bool isUnsafe = !DepInfo.isClobber() || DepInfo.getInst() != MDep;
|
||||
MD->removeInstruction(TmpStore);
|
||||
TmpStore->eraseFromParent();
|
||||
|
||||
if (isUnsafe) {
|
||||
// Clean up the inserted cast instruction.
|
||||
if (TmpCast != MDep->getSource())
|
||||
cast<Instruction>(TmpCast)->eraseFromParent();
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
|
||||
<< " " << *MDep << "\n"
|
||||
<< " " << *CS.getInstruction() << "\n");
|
||||
|
||||
// Otherwise we're good! Update the byval argument.
|
||||
CS.setArgument(ArgNo, TmpCast);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// iterateOnFunction - Executes one iteration of MemCpyOpt.
|
||||
bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
bool MadeChange = false;
|
||||
|
||||
// Walk all instruction in the function.
|
||||
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
|
||||
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
|
||||
BI != BE;) {
|
||||
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
|
||||
// Avoid invalidating the iterator.
|
||||
Instruction *I = BI++;
|
||||
|
||||
bool RepeatInstruction = false;
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
MadeChange |= processStore(SI, BI);
|
||||
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
|
||||
MadeChange |= processMemCpy(M);
|
||||
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
|
||||
if (processMemMove(M)) {
|
||||
--BI; // Reprocess the new memcpy.
|
||||
MadeChange = true;
|
||||
}
|
||||
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) {
|
||||
RepeatInstruction = processMemCpy(M);
|
||||
} else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
|
||||
RepeatInstruction = processMemMove(M);
|
||||
} else if (CallSite CS = (Value*)I) {
|
||||
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
|
||||
if (CS.paramHasAttr(i+1, Attribute::ByVal))
|
||||
MadeChange |= processByValArgument(CS, i);
|
||||
}
|
||||
|
||||
// Reprocess the instruction if desired.
|
||||
if (RepeatInstruction) {
|
||||
--BI;
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -844,12 +925,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
//
|
||||
bool MemCpyOpt::runOnFunction(Function &F) {
|
||||
bool MadeChange = false;
|
||||
MD = &getAnalysis<MemoryDependenceAnalysis>();
|
||||
while (1) {
|
||||
if (!iterateOnFunction(F))
|
||||
break;
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
MD = 0;
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
|
@ -62,3 +62,18 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
|
||||
; CHECK-NEXT: call void @llvm.memcpy
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
||||
; PR8644
|
||||
define void @test4(i8 *%P) {
|
||||
%A = alloca {i32, i32}
|
||||
%a = bitcast {i32, i32}* %A to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
|
||||
call void @test4a(i8* byval align 1 %a)
|
||||
ret void
|
||||
; CHECK: @test4
|
||||
; CHECK-NEXT: call void @test4a(
|
||||
}
|
||||
|
||||
declare void @test4a(i8* byval align 1)
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
||||
|
@ -3,7 +3,7 @@
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i686-apple-darwin9"
|
||||
|
||||
define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval %z) nounwind {
|
||||
define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 8 %z) nounwind {
|
||||
entry:
|
||||
%iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3]
|
||||
%memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
|
||||
@ -16,7 +16,7 @@ entry:
|
||||
%tmp8 = load x86_fp80* %tmp7, align 16 ; <x86_fp80> [#uses=1]
|
||||
store x86_fp80 %tmp3, x86_fp80* %real, align 16
|
||||
store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
|
||||
call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval %iz ) nounwind
|
||||
call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind
|
||||
%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
|
||||
%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
|
||||
|
Loading…
x
Reference in New Issue
Block a user