diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index d9d1df0b8e6..89531dcfea6 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -729,6 +729,8 @@ function. The operand fields are: * ``ccc``: code 0 * ``fastcc``: code 8 * ``coldcc``: code 9 + * ``webkit_jscc``: code 12 + * ``anyregcc``: code 13 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 6185c1cd387..e733420e096 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -620,14 +620,15 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; -def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, +def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RBP, (sequence "XMM%u", 0, 15))>; -def CSR_AllRegs_64 : CalleeSavedRegs<(add CSR_MostRegs_64, RAX, RSP, - (sequence "XMM%u", 16, 31), - (sequence "YMM%u", 0, 31), - (sequence "ZMM%u", 0, 31))>; +def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP, + (sequence "XMM%u", 16, 31))>; +def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP, + (sequence "YMM%u", 0, 31)), + (sequence "XMM%u", 0, 15))>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index bdaf4ef58fc..220dc433fef 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -234,17 +234,18 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX512 = TM.getSubtarget().hasAVX512(); + switch (MF->getFunction()->getCallingConv()) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; - case CallingConv::AnyReg: - return CSR_AllRegs_64_SaveList; - + if (HasAVX) + return CSR_64_AllRegs_AVX_SaveList; + return CSR_64_AllRegs_SaveList; case CallingConv::Intel_OCL_BI: { - bool HasAVX = TM.getSubtarget().hasAVX(); - bool HasAVX512 = TM.getSubtarget().hasAVX512(); if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; if (HasAVX512 && Is64Bit) @@ -257,12 +258,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_64_Intel_OCL_BI_SaveList; break; } - case CallingConv::Cold: if (Is64Bit) - return CSR_MostRegs_64_SaveList; + return CSR_64_MostRegs_SaveList; break; - default: break; } @@ -285,7 +284,15 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { bool HasAVX = TM.getSubtarget().hasAVX(); bool HasAVX512 = TM.getSubtarget().hasAVX512(); - if (CC == CallingConv::Intel_OCL_BI) { + switch (CC) { + case CallingConv::GHC: + case CallingConv::HiPE: + return CSR_NoRegs_RegMask; + case CallingConv::AnyReg: + if (HasAVX) + return CSR_64_AllRegs_AVX_RegMask; + return CSR_64_AllRegs_RegMask; + case CallingConv::Intel_OCL_BI: { if (IsWin64 && HasAVX512) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; if (Is64Bit && HasAVX512) @@ -297,17 +304,20 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_RegMask; } - if (CC == CallingConv::GHC || CC == CallingConv::HiPE) - return CSR_NoRegs_RegMask; - if (CC == CallingConv::AnyReg) - return CSR_AllRegs_64_RegMask; - if (!Is64Bit) - return CSR_32_RegMask; - if (CC == CallingConv::Cold) - return CSR_MostRegs_64_RegMask; - if (IsWin64) - return CSR_Win64_RegMask; - return CSR_64_RegMask; + case CallingConv::Cold: + if (Is64Bit) + return CSR_64_MostRegs_RegMask; + break; + default: + break; + } + + if (Is64Bit) { + if (IsWin64) + return CSR_Win64_RegMask; + return CSR_64_RegMask; + } + return CSR_32_RegMask; } const uint32_t* diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll index 0677023174e..a00b9c433f8 100644 --- a/test/CodeGen/X86/anyregcc.ll +++ b/test/CodeGen/X86/anyregcc.ll @@ -1,4 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck --check-prefix=SSE %s +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s + ; Stackmap Header: no constants - 6 callsites ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps @@ -336,5 +339,105 @@ entry: ret i64 %result } +; Make sure all regs are spilled +define anyregcc void @anyregcc1() { +entry: +;SSE-LABEL: anyregcc1 +;SSE: pushq %rax +;SSE: pushq %rbp +;SSE: pushq %r15 +;SSE: pushq %r14 +;SSE: pushq %r13 +;SSE: pushq %r12 +;SSE: pushq %r11 +;SSE: pushq %r10 +;SSE: pushq %r9 +;SSE: pushq %r8 +;SSE: pushq %rdi +;SSE: pushq %rsi +;SSE: pushq %rdx +;SSE: pushq %rcx +;SSE: pushq %rbx +;SSE: movaps %xmm15 +;SSE-NEXT: movaps %xmm14 +;SSE-NEXT: movaps %xmm13 +;SSE-NEXT: movaps %xmm12 +;SSE-NEXT: movaps %xmm11 +;SSE-NEXT: movaps %xmm10 +;SSE-NEXT: movaps %xmm9 +;SSE-NEXT: movaps %xmm8 +;SSE-NEXT: movaps %xmm7 +;SSE-NEXT: movaps %xmm6 +;SSE-NEXT: movaps %xmm5 +;SSE-NEXT: movaps %xmm4 +;SSE-NEXT: movaps %xmm3 +;SSE-NEXT: movaps %xmm2 +;SSE-NEXT: movaps %xmm1 +;SSE-NEXT: movaps %xmm0 +;AVX-LABEL:anyregcc1 +;AVX: pushq %rax +;AVX: pushq %rbp +;AVX: pushq %r15 +;AVX: pushq %r14 +;AVX: pushq %r13 +;AVX: pushq %r12 +;AVX: pushq %r11 +;AVX: pushq %r10 +;AVX: pushq %r9 +;AVX: pushq %r8 +;AVX: pushq %rdi +;AVX: pushq %rsi +;AVX: pushq %rdx +;AVX: pushq %rcx +;AVX: pushq %rbx +;AVX: vmovups %ymm15 +;AVX-NEXT: vmovups %ymm14 +;AVX-NEXT: vmovups %ymm13 +;AVX-NEXT: vmovups %ymm12 +;AVX-NEXT: vmovups %ymm11 +;AVX-NEXT: vmovups %ymm10 +;AVX-NEXT: vmovups %ymm9 +;AVX-NEXT: vmovups %ymm8 +;AVX-NEXT: vmovups %ymm7 +;AVX-NEXT: vmovups %ymm6 +;AVX-NEXT: vmovups %ymm5 +;AVX-NEXT: vmovups %ymm4 +;AVX-NEXT: vmovups %ymm3 +;AVX-NEXT: vmovups %ymm2 +;AVX-NEXT: vmovups %ymm1 +;AVX-NEXT: vmovups %ymm0 + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret void +} + +; Make sure we don't spill any XMMs/YMMs +declare anyregcc void @foo() +define void @anyregcc2() { +entry: +;SSE-LABEL: anyregcc2 +;SSE-NOT: movaps %xmm +;AVX-LABEL: anyregcc2 +;AVX-NOT: vmovups %ymm + %a0 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind + %a1 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind + %a2 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a3 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a4 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a5 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a6 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a7 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a8 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a9 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call anyregcc void @foo() + call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3, <2 x double> %a4, <2 x double> %a5, <2 x double> %a6, <2 x double> %a7, <2 x double> %a8, <2 x double> %a9, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15) + ret void +} + declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)