From 832e4943598d821687cec79f512803c1ca03cff7 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 19 Apr 2011 05:52:03 +0000 Subject: [PATCH] Implement support for x86 fastisel of small fixed-sized memcpys, which are generated en-mass for C++ PODs. On my c++ test file, this cuts the fast isel rejects by 10x and shrinks the generated .s file by 5% git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 98 ++++++++++++--------------- lib/Target/X86/X86FastISel.cpp | 55 +++++++++++++-- test/CodeGen/X86/fast-isel-x86-64.ll | 11 +++ 3 files changed, 106 insertions(+), 58 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 76e9a7cac2d..83894cd4a00 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -486,8 +486,7 @@ bool FastISel::SelectCall(const User *I) { if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(I); @@ -552,64 +551,57 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::eh_exception: { EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(I, ResultReg); + return true; } case Intrinsic::eh_selector: { EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast(I)); + FuncInfo.CatchInfoLost.insert(cast(I)); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(I, ResultReg); - - return true; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } - } - break; + + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(I); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + UpdateValueMap(I, ResultReg); + + return true; } } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index c563a56f985..f4a26923b33 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1325,6 +1325,52 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::memcpy: { + const MemCpyInst &MCI = cast(I); + // Don't handle volatile or variable length memcpys. + if (MCI.isVolatile() || !isa(MCI.getLength())) + return false; + + // Don't inline super long memcpys. We could lower these to a memcpy call, + // but we might as well bail out. + uint64_t Len = cast(MCI.getLength())->getZExtValue(); + bool i64Legal = TLI.isTypeLegal(MVT::i64); + if (Len > (i64Legal ? 32 : 16)) return false; + + // Get the address of the dest and source addresses. + X86AddressMode DestAM, SrcAM; + if (!X86SelectAddress(MCI.getRawDest(), DestAM) || + !X86SelectAddress(MCI.getRawSource(), SrcAM)) + return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 8 && i64Legal) + VT = MVT::i64; + else if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + unsigned Reg; + bool RV = X86FastEmitLoad(VT, SrcAM, Reg); + RV &= X86FastEmitStore(VT, Reg, DestAM); + assert(RV && "Failed to emit load or store??"); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + DestAM.Disp += Size; + SrcAM.Disp += Size; + } + + return true; + } + case Intrinsic::stackprotector: { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); @@ -1335,17 +1381,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; - if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast(I.getArgOperand(1)); + // FIXME: This should be moved to generic code! + ConstantInt *CI = cast(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); - assert(CI && "Non-constant type in Intrinsic::objectsize?"); - MVT VT; if (!isTypeLegal(Ty, VT)) return false; @@ -1383,6 +1426,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: { + // FIXME: Should fold immediates. + // Replace "add with overflow" intrinsics with an "add" instruction followed // by a seto/setc instruction. Later on, when the "extractvalue" // instructions are encountered, we use the fact that two registers were diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index 508af25772c..1770e732230 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -170,3 +170,14 @@ entry: ; CHECK: callq } +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) + +; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy +define void @test15(i8* %a, i8* %b) nounwind { + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false) + ret void +; CHECK: test15: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: ret +}