diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index eb4e5bde3df..0df257da4a4 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1055,20 +1055,19 @@ void Verifier::visitFunction(const Function &F) {
           "Attribute 'builtin' can only be applied to a callsite.", &F);
 
   // Check that this function meets the restrictions on this calling convention.
+  // Sometimes varargs is used for perfectly forwarding thunks, so some of these
+  // restrictions can be lifted.
   switch (F.getCallingConv()) {
   default:
-    break;
   case CallingConv::C:
     break;
   case CallingConv::Fast:
   case CallingConv::Cold:
-  case CallingConv::X86_FastCall:
-  case CallingConv::X86_ThisCall:
   case CallingConv::Intel_OCL_BI:
   case CallingConv::PTX_Kernel:
   case CallingConv::PTX_Device:
-    Assert1(!F.isVarArg(),
-            "Varargs functions must have C calling conventions!", &F);
+    Assert1(!F.isVarArg(), "Calling convention does not support varargs or "
+                           "perfect forwarding!", &F);
     break;
   }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7c0bfeef947..0972a034d90 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3605,23 +3605,18 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 /// own arguments. Callee pop is necessary to support tail calls.
 bool X86::isCalleePop(CallingConv::ID CallingConv,
                       bool is64Bit, bool IsVarArg, bool TailCallOpt) {
-  if (IsVarArg)
-    return false;
-
   switch (CallingConv) {
   default:
     return false;
   case CallingConv::X86_StdCall:
-    return !is64Bit;
   case CallingConv::X86_FastCall:
-    return !is64Bit;
   case CallingConv::X86_ThisCall:
     return !is64Bit;
   case CallingConv::Fast:
-    return TailCallOpt;
   case CallingConv::GHC:
-    return TailCallOpt;
   case CallingConv::HiPE:
+    if (IsVarArg)
+      return false;
     return TailCallOpt;
   }
 }
diff --git a/test/CodeGen/X86/vararg-callee-cleanup.ll b/test/CodeGen/X86/vararg-callee-cleanup.ll
new file mode 100644
index 00000000000..2dcf319a208
--- /dev/null
+++ b/test/CodeGen/X86/vararg-callee-cleanup.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=i686-pc-windows < %s | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+
+declare x86_thiscallcc void @thiscall_thunk(i8* %this, ...)
+define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_thiscall_thunk:
+; CHECK: calll _thiscall_thunk
+; CHECK-NEXT: subl $8, %esp
+
+; We don't mangle the argument size into variadic callee cleanup functions.
+
+declare x86_stdcallcc void @stdcall_thunk(i8* %this, ...)
+define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_stdcall_thunk:
+; CHECK: calll _stdcall_thunk{{$}}
+; CHECK-NEXT: subl $12, %esp
+
+declare x86_fastcallcc void @fastcall_thunk(i8* %this, ...)
+define i32 @call_varargs_fastcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_fastcall_thunk:
+; CHECK: calll @fastcall_thunk{{$}}
+; CHECK-NEXT: subl $4, %esp
+
+; If you actually return from such a thunk, it will only pop the non-variadic
+; portion of the arguments, which is different from what the callee passes.
+
+define x86_stdcallcc void @varargs_stdcall_return(i32, i32, ...) {
+  ret void
+}
+
+; CHECK: _varargs_stdcall_return:
+; CHECK: retl $8
diff --git a/test/Verifier/musttail-valid.ll b/test/Verifier/musttail-valid.ll
index 815d77a13e3..bdc0c8cf7fd 100644
--- a/test/Verifier/musttail-valid.ll
+++ b/test/Verifier/musttail-valid.ll
@@ -14,3 +14,26 @@ define i32* @similar_ret_ptrty() {
   %w = bitcast i8* %v to i32*
   ret i32* %w
 }
+
+declare x86_thiscallcc void @varargs_thiscall(i8*, ...)
+define x86_thiscallcc void @varargs_thiscall_thunk(i8* %this, ...) {
+  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  ret void
+}
+
+declare x86_fastcallcc void @varargs_fastcall(i8*, ...)
+define x86_fastcallcc void @varargs_fastcall_thunk(i8* %this, ...) {
+  musttail call x86_fastcallcc void (i8*, ...)* @varargs_fastcall(i8* %this, ...)
+  ret void
+}
+
+define x86_thiscallcc void @varargs_thiscall_unreachable(i8* %this, ...) {
+  unreachable
+}
+
+define x86_thiscallcc void @varargs_thiscall_ret_unreachable(i8* %this, ...) {
+  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  ret void
+bb1:
+  ret void
+}