Implement support for x86 fastisel of small fixed-sized memcpys, which are generated

en-mass for C++ PODs.  On my c++ test file, this cuts the fast isel rejects by 10x 
and shrinks the generated .s file by 5%


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129755 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2011-04-19 05:52:03 +00:00
parent ff009ad1e1
commit 832e494359
3 changed files with 106 additions and 58 deletions

View File

@ -486,8 +486,7 @@ bool FastISel::SelectCall(const User *I) {
if (!F) return false;
// Handle selected intrinsic function calls.
unsigned IID = F->getIntrinsicID();
switch (IID) {
switch (F->getIntrinsicID()) {
default: break;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
@ -552,64 +551,57 @@ bool FastISel::SelectCall(const User *I) {
}
case Intrinsic::eh_exception: {
EVT VT = TLI.getValueType(I->getType());
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
default: break;
case TargetLowering::Expand: {
assert(FuncInfo.MBB->isLandingPad() &&
"Call to eh.exception not in landing pad!");
unsigned Reg = TLI.getExceptionAddressRegister();
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Reg);
UpdateValueMap(I, ResultReg);
return true;
}
}
break;
if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
break;
assert(FuncInfo.MBB->isLandingPad() &&
"Call to eh.exception not in landing pad!");
unsigned Reg = TLI.getExceptionAddressRegister();
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Reg);
UpdateValueMap(I, ResultReg);
return true;
}
case Intrinsic::eh_selector: {
EVT VT = TLI.getValueType(I->getType());
switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
default: break;
case TargetLowering::Expand: {
if (FuncInfo.MBB->isLandingPad())
AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
else {
if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
break;
if (FuncInfo.MBB->isLandingPad())
AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
else {
#ifndef NDEBUG
FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
#endif
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
EVT SrcVT = TLI.getPointerTy();
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Reg);
bool ResultRegIsKill = hasTrivialKill(I);
// Cast the register to the type of the selector.
if (SrcVT.bitsGT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
ResultReg, ResultRegIsKill);
else if (SrcVT.bitsLT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
if (ResultReg == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
UpdateValueMap(I, ResultReg);
return true;
if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
}
break;
unsigned Reg = TLI.getExceptionSelectorRegister();
EVT SrcVT = TLI.getPointerTy();
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Reg);
bool ResultRegIsKill = hasTrivialKill(I);
// Cast the register to the type of the selector.
if (SrcVT.bitsGT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
ResultReg, ResultRegIsKill);
else if (SrcVT.bitsLT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
if (ResultReg == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
UpdateValueMap(I, ResultReg);
return true;
}
}

View File

@ -1325,6 +1325,52 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
case Intrinsic::memcpy: {
const MemCpyInst &MCI = cast<MemCpyInst>(I);
// Don't handle volatile or variable length memcpys.
if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
return false;
// Don't inline super long memcpys. We could lower these to a memcpy call,
// but we might as well bail out.
uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
bool i64Legal = TLI.isTypeLegal(MVT::i64);
if (Len > (i64Legal ? 32 : 16)) return false;
// Get the address of the dest and source addresses.
X86AddressMode DestAM, SrcAM;
if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
!X86SelectAddress(MCI.getRawSource(), SrcAM))
return false;
// We don't care about alignment here since we just emit integer accesses.
while (Len) {
MVT VT;
if (Len >= 8 && i64Legal)
VT = MVT::i64;
else if (Len >= 4)
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
else {
assert(Len == 1);
VT = MVT::i8;
}
unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
RV &= X86FastEmitStore(VT, Reg, DestAM);
assert(RV && "Failed to emit load or store??");
unsigned Size = VT.getSizeInBits()/8;
Len -= Size;
DestAM.Disp += Size;
SrcAM.Disp += Size;
}
return true;
}
case Intrinsic::stackprotector: {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
@ -1335,17 +1381,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
return true;
}
case Intrinsic::objectsize: {
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
// FIXME: This should be moved to generic code!
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
assert(CI && "Non-constant type in Intrinsic::objectsize?");
MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
@ -1383,6 +1426,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
// FIXME: Should fold immediates.
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction. Later on, when the "extractvalue"
// instructions are encountered, we use the fact that two registers were

View File

@ -170,3 +170,14 @@ entry:
; CHECK: callq
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
define void @test15(i8* %a, i8* %b) nounwind {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
ret void
; CHECK: test15:
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: ret
}