1
0
mirror of https://github.com/c64scene-ar/llvm-6502.git synced 2025-04-14 06:37:33 +00:00

Add support for inlining small memcpys.

rdar://10412592


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144578 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2011-11-14 22:46:17 +00:00
parent e489af8dce
commit 909cb4f2f2
2 changed files with 94 additions and 5 deletions
lib/Target/ARM
test/CodeGen/ARM

@ -185,6 +185,9 @@ class ARMFastISel : public FastISel {
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
bool ARMIsMemXferSmall(uint64_t Len);
bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
bool isMemCpy);
unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
@ -2193,18 +2196,76 @@ bool ARMFastISel::SelectCall(const Instruction *I,
return true;
}
bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) {
return Len <= 16;
}
bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
bool isMemCpy) {
// FIXME: Memmove's require a little more care because their source and
// destination may overlap.
if (!isMemCpy)
return false;
// Make sure we don't bloat code by inlining very large memcpy's.
if (!ARMIsMemXferSmall(Len))
return false;
// We don't care about alignment here since we just emit integer accesses.
while (Len) {
MVT VT;
if (Len >= 4)
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
else {
assert(Len == 1);
VT = MVT::i8;
}
bool RV;
unsigned ResultReg;
RV = ARMEmitLoad(VT, ResultReg, Src);
assert (RV = true && "Should be able to handle this load.");
RV = ARMEmitStore(VT, ResultReg, Dest);
assert (RV = true && "Should be able to handle this store.");
unsigned Size = VT.getSizeInBits()/8;
Len -= Size;
Dest.Offset += Size;
Src.Offset += Size;
}
return true;
}
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
// FIXME: Small memcpy/memmove's are common enough that we want to do them
// without a call if possible.
const MemTransferInst &MTI = cast<MemTransferInst>(I);
// Don't handle volatile.
if (MTI.isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
// Small memcpy/memmove's are common enough that we want to do them
// without a call if possible.
uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
if (ARMIsMemXferSmall(Len)) {
Address Dest, Src;
if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
!ARMComputeAddress(MTI.getRawSource(), Src))
return false;
if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy))
return true;
}
}
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;

@ -33,7 +33,7 @@ define void @t2() nounwind ssp {
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
; ARM: movw r2, #10
; ARM: movw r2, #17
; ARM: str r0, [sp] @ 4-byte Spill
; ARM: mov r0, r1
; ARM: ldr r1, [sp] @ 4-byte Reload
@ -43,11 +43,11 @@ define void @t2() nounwind ssp {
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
; THUMB: movs r2, #10
; THUMB: movs r2, #17
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memcpy
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
ret void
}
@ -75,4 +75,32 @@ define void @t3() nounwind ssp {
ret void
}
define void @t4() nounwind ssp {
; ARM: t4
; ARM: ldr r0, LCPI3_0
; ARM: ldr r0, [r0]
; ARM: ldr r1, LCPI3_1
; ARM: ldr r1, [r1]
; ARM: ldr r2, [r1, #16]
; ARM: str r2, [r0, #4]
; ARM: ldr r2, [r1, #20]
; ARM: str r2, [r0, #8]
; ARM: ldrh r1, [r1, #24]
; ARM: strh r1, [r0, #12]
; ARM: bx lr
; THUMB: ldr.n r0, LCPI3_0
; THUMB: ldr r0, [r0]
; THUMB: ldr.n r1, LCPI3_1
; THUMB: ldr r1, [r1]
; THUMB: ldr r2, [r1, #16]
; THUMB: str r2, [r0, #4]
; THUMB: ldr r2, [r1, #20]
; THUMB: str r2, [r0, #8]
; THUMB: ldrh r1, [r1, #24]
; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind