mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-14 06:37:33 +00:00
Add support for inlining small memcpys.
rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144578 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e489af8dce
commit
909cb4f2f2
@ -185,6 +185,9 @@ class ARMFastISel : public FastISel {
|
||||
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
|
||||
bool ARMComputeAddress(const Value *Obj, Address &Addr);
|
||||
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
|
||||
bool ARMIsMemXferSmall(uint64_t Len);
|
||||
bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
|
||||
bool isMemCpy);
|
||||
unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
|
||||
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
|
||||
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
|
||||
@ -2193,18 +2196,76 @@ bool ARMFastISel::SelectCall(const Instruction *I,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) {
|
||||
return Len <= 16;
|
||||
}
|
||||
|
||||
bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
|
||||
bool isMemCpy) {
|
||||
// FIXME: Memmove's require a little more care because their source and
|
||||
// destination may overlap.
|
||||
if (!isMemCpy)
|
||||
return false;
|
||||
|
||||
// Make sure we don't bloat code by inlining very large memcpy's.
|
||||
if (!ARMIsMemXferSmall(Len))
|
||||
return false;
|
||||
|
||||
// We don't care about alignment here since we just emit integer accesses.
|
||||
while (Len) {
|
||||
MVT VT;
|
||||
if (Len >= 4)
|
||||
VT = MVT::i32;
|
||||
else if (Len >= 2)
|
||||
VT = MVT::i16;
|
||||
else {
|
||||
assert(Len == 1);
|
||||
VT = MVT::i8;
|
||||
}
|
||||
|
||||
bool RV;
|
||||
unsigned ResultReg;
|
||||
RV = ARMEmitLoad(VT, ResultReg, Src);
|
||||
assert (RV = true && "Should be able to handle this load.");
|
||||
RV = ARMEmitStore(VT, ResultReg, Dest);
|
||||
assert (RV = true && "Should be able to handle this store.");
|
||||
|
||||
unsigned Size = VT.getSizeInBits()/8;
|
||||
Len -= Size;
|
||||
Dest.Offset += Size;
|
||||
Src.Offset += Size;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
|
||||
// FIXME: Handle more intrinsics.
|
||||
switch (I.getIntrinsicID()) {
|
||||
default: return false;
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove: {
|
||||
// FIXME: Small memcpy/memmove's are common enough that we want to do them
|
||||
// without a call if possible.
|
||||
const MemTransferInst &MTI = cast<MemTransferInst>(I);
|
||||
// Don't handle volatile.
|
||||
if (MTI.isVolatile())
|
||||
return false;
|
||||
|
||||
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
|
||||
// we would emit dead code because we don't currently handle memmoves.
|
||||
bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
|
||||
if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
|
||||
// Small memcpy/memmove's are common enough that we want to do them
|
||||
// without a call if possible.
|
||||
uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
|
||||
if (ARMIsMemXferSmall(Len)) {
|
||||
Address Dest, Src;
|
||||
if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
|
||||
!ARMComputeAddress(MTI.getRawSource(), Src))
|
||||
return false;
|
||||
if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!MTI.getLength()->getType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
@ -33,7 +33,7 @@ define void @t2() nounwind ssp {
|
||||
; ARM: ldr r0, [r0]
|
||||
; ARM: add r1, r0, #4
|
||||
; ARM: add r0, r0, #16
|
||||
; ARM: movw r2, #10
|
||||
; ARM: movw r2, #17
|
||||
; ARM: str r0, [sp] @ 4-byte Spill
|
||||
; ARM: mov r0, r1
|
||||
; ARM: ldr r1, [sp] @ 4-byte Reload
|
||||
@ -43,11 +43,11 @@ define void @t2() nounwind ssp {
|
||||
; THUMB: ldr r0, [r0]
|
||||
; THUMB: adds r1, r0, #4
|
||||
; THUMB: adds r0, #16
|
||||
; THUMB: movs r2, #10
|
||||
; THUMB: movs r2, #17
|
||||
; THUMB: movt r2, #0
|
||||
; THUMB: mov r0, r1
|
||||
; THUMB: bl _memcpy
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -75,4 +75,32 @@ define void @t3() nounwind ssp {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t4() nounwind ssp {
|
||||
; ARM: t4
|
||||
; ARM: ldr r0, LCPI3_0
|
||||
; ARM: ldr r0, [r0]
|
||||
; ARM: ldr r1, LCPI3_1
|
||||
; ARM: ldr r1, [r1]
|
||||
; ARM: ldr r2, [r1, #16]
|
||||
; ARM: str r2, [r0, #4]
|
||||
; ARM: ldr r2, [r1, #20]
|
||||
; ARM: str r2, [r0, #8]
|
||||
; ARM: ldrh r1, [r1, #24]
|
||||
; ARM: strh r1, [r0, #12]
|
||||
; ARM: bx lr
|
||||
; THUMB: ldr.n r0, LCPI3_0
|
||||
; THUMB: ldr r0, [r0]
|
||||
; THUMB: ldr.n r1, LCPI3_1
|
||||
; THUMB: ldr r1, [r1]
|
||||
; THUMB: ldr r2, [r1, #16]
|
||||
; THUMB: str r2, [r0, #4]
|
||||
; THUMB: ldr r2, [r1, #20]
|
||||
; THUMB: str r2, [r0, #8]
|
||||
; THUMB: ldrh r1, [r1, #24]
|
||||
; THUMB: strh r1, [r0, #12]
|
||||
; THUMB: bx lr
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
||||
|
Loading…
x
Reference in New Issue
Block a user