diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3237a103609..eeea18e1d8c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -111,6 +111,8 @@ private: bool X86VisitIntrinsicCall(const IntrinsicInst &I); bool X86SelectCall(const Instruction *I); + bool DoSelectCall(const Instruction *I, const char *MemIntName); + const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); } @@ -1333,20 +1335,44 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { case Intrinsic::memcpy: { const MemCpyInst &MCI = cast(I); // Don't handle volatile or variable length memcpys. - if (MCI.isVolatile() || !isa(MCI.getLength())) + if (MCI.isVolatile()) return false; - uint64_t Len = cast(MCI.getLength())->getZExtValue(); + if (isa(MCI.getLength())) { + // Small memcpy's are common enough that we want to do them + // without a call if possible. + uint64_t Len = cast(MCI.getLength())->getZExtValue(); + if (IsMemcpySmall(Len)) { + X86AddressMode DestAM, SrcAM; + if (!X86SelectAddress(MCI.getRawDest(), DestAM) || + !X86SelectAddress(MCI.getRawSource(), SrcAM)) + return false; + TryEmitSmallMemcpy(DestAM, SrcAM, Len); + return true; + } + } - // Get the address of the dest and source addresses. - X86AddressMode DestAM, SrcAM; - if (!X86SelectAddress(MCI.getRawDest(), DestAM) || - !X86SelectAddress(MCI.getRawSource(), SrcAM)) + unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; + if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth)) return false; - return TryEmitSmallMemcpy(DestAM, SrcAM, Len); + if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255) + return false; + + return DoSelectCall(&I, "memcpy"); } + case Intrinsic::memset: { + const MemSetInst &MSI = cast(I); + unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; + if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth)) + return false; + + if (MSI.getDestAddressSpace() > 255) + return false; + + return DoSelectCall(&I, "memset"); + } case Intrinsic::stackprotector: { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); @@ -1437,6 +1463,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (const IntrinsicInst *II = dyn_cast(CI)) return X86VisitIntrinsicCall(*II); + return DoSelectCall(I, 0); +} + +// Select either a call, or an llvm.memcpy/memmove/memset intrinsic +bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { + const CallInst *CI = cast(I); + const Value *Callee = CI->getCalledValue(); + // Handle only C and fastcc calling conventions for now. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); @@ -1498,6 +1532,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { + // If we're lowering a mem intrinsic instead of a regular call, skip the + // last two arguments, which should not passed to the underlying functions. + if (MemIntName && e-i <= 2) + break; Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; @@ -1744,8 +1782,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) - .addGlobalAddress(GV, 0, OpFlags); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); + if (MemIntName) + MIB.addExternalSymbol(MemIntName); + else + MIB.addGlobalAddress(GV, 0, OpFlags); } // Add an implicit use GOT pointer in EBX. diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll index 7e57965bb5b..2fbe4e27bf0 100644 --- a/test/CodeGen/X86/fast-isel-call.ll +++ b/test/CodeGen/X86/fast-isel-call.ll @@ -29,3 +29,27 @@ define void @test2(%struct.s* %d) nounwind { ; CHECK: movl 8(%eax) ; CHECK: movl {{.*}}, 8(%esp) } + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +define void @test3(i8* %a) { + call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i32 1, i1 false) + ret void +; CHECK: test3: +; CHECK: movl {{.*}}, (%esp) +; CHECK: movl $0, 4(%esp) +; CHECK: movl $100, 8(%esp) +; CHECK: calll {{.*}}memset +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +define void @test4(i8* %a, i8* %b) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i32 1, i1 false) + ret void +; CHECK: test4: +; CHECK: movl {{.*}}, (%esp) +; CHECK: movl {{.*}}, 4(%esp) +; CHECK: movl $100, 8(%esp) +; CHECK: calll {{.*}}memcpy +}