diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ff4a283b583..9b985f94ad4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2525,16 +2525,29 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (RegInfo->needsStackRealignment(MF)) return false; - // Do not sibcall optimize vararg calls unless the call site is not passing - // any arguments. - if (isVarArg && !Outs.empty()) - return false; - // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; + // Do not sibcall optimize vararg calls unless all arguments are passed via + // registers + if (isVarArg && !Outs.empty()) { + SmallVector ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + if (!ArgLocs[i].isRegLoc()) + return false; + } + // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack. // Therefore if it's not used by the call it is not safe to optimize this into // a sibcall. diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll index d2c30c64f23..e04770e267d 100644 --- a/test/CodeGen/X86/bool-zext.ll +++ b/test/CodeGen/X86/bool-zext.ll @@ -2,7 +2,7 @@ ; CHECK: @bar1 ; CHECK: movzbl -; CHECK: callq +; CHECK: jmp define void @bar1(i1 zeroext %v1) nounwind ssp { entry: %conv = zext i1 %v1 to i32 @@ -12,7 +12,7 @@ entry: ; CHECK: @bar2 ; CHECK-NOT: movzbl -; CHECK: callq +; CHECK: jmp define void @bar2(i8 zeroext %v1) nounwind ssp { entry: %conv = zext i8 %v1 to i32 diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll new file mode 100644 index 00000000000..1187afcd7a2 --- /dev/null +++ b/test/CodeGen/X86/vararg_tailcall.ll @@ -0,0 +1,85 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00" +@sel = external global i8* +@sel3 = external global i8* +@sel4 = external global i8* +@sel5 = external global i8* +@sel6 = external global i8* +@sel7 = external global i8* + +; CHECK: @foo +; CHECK: jmp +define void @foo(i64 %arg) nounwind optsize ssp noredzone { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone + ret void +} + +declare i32 @printf(i8*, ...) optsize noredzone + +; CHECK: @bar +; CHECK: jmp +define void @bar(i64 %arg) nounwind optsize ssp noredzone { +entry: + tail call void @bar2(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone + ret void +} + +declare void @bar2(i8*, i64) optsize noredzone + +; CHECK: @foo2 +; CHECK: jmp +define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone { +entry: + %tmp1 = load i8** @sel, align 8, !tbaa !0 + %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone + ret i8* %call +} + +declare i8* @x2(i8*, i8*, ...) optsize noredzone + +; CHECK: @foo6 +; CHECK: jmp +define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone { +entry: + %tmp2 = load i8** @sel3, align 8, !tbaa !0 + %tmp3 = load i8** @sel4, align 8, !tbaa !0 + %tmp4 = load i8** @sel5, align 8, !tbaa !0 + %tmp5 = load i8** @sel6, align 8, !tbaa !0 + %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone + ret i8* %call +} + +declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone + +; CHECK: @foo7 +; CHECK: callq +define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone { +entry: + %tmp2 = load i8** @sel3, align 8, !tbaa !0 + %tmp3 = load i8** @sel4, align 8, !tbaa !0 + %tmp4 = load i8** @sel5, align 8, !tbaa !0 + %tmp5 = load i8** @sel6, align 8, !tbaa !0 + %tmp6 = load i8** @sel7, align 8, !tbaa !0 + %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone + ret i8* %call +} + +declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone + +; CHECK: @foo8 +; CHECK: callq +define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone { +entry: + %tmp2 = load i8** @sel3, align 8, !tbaa !0 + %tmp3 = load i8** @sel4, align 8, !tbaa !0 + %tmp4 = load i8** @sel5, align 8, !tbaa !0 + %tmp5 = load i8** @sel6, align 8, !tbaa !0 + %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone + ret i8* %call +} + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null}