From ceaf829339bcd0719a43b3e8c22eaab7a973d37d Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Fri, 17 Jan 2014 19:47:03 +0000 Subject: [PATCH] Add two new calling conventions for runtime calls This patch adds two new target-independent calling conventions for runtime calls - PreserveMost and PreserveAll. The target-specific implementation for X86-64 is defined as following: - Arguments are passed as for the default C calling convention - The same applies for the return value(s) - PreserveMost preserves all GPRs - except R11 - PreserveAll preserves all GPRs and all XMMs/YMMs - except R11 Reviewed by Lang and Philip git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199508 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/BitCodeFormat.rst | 2 + docs/LangRef.rst | 43 +++++++++++ include/llvm/IR/CallingConv.h | 8 ++ lib/AsmParser/LLLexer.cpp | 2 + lib/AsmParser/LLParser.cpp | 4 + lib/AsmParser/LLToken.h | 1 + lib/IR/AsmWriter.cpp | 2 + lib/Target/X86/X86CallingConv.td | 10 +++ lib/Target/X86/X86RegisterInfo.cpp | 12 +++ test/CodeGen/X86/preserve_allcc64.ll | 104 ++++++++++++++++++++++++++ test/CodeGen/X86/preserve_mostcc64.ll | 86 +++++++++++++++++++++ 11 files changed, 274 insertions(+) create mode 100644 test/CodeGen/X86/preserve_allcc64.ll create mode 100644 test/CodeGen/X86/preserve_mostcc64.ll diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 9363a62d080..38b4010483a 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -739,6 +739,8 @@ function. The operand fields are: * ``coldcc``: code 9 * ``webkit_jscc``: code 12 * ``anyregcc``: code 13 + * ``preserve_mostcc``: code 14 + * ``preserve_allcc``: code 15 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index d450b2a465f..39948f4b083 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -359,6 +359,49 @@ added in the future: allocated. This can currently only be used with calls to llvm.experimental.patchpoint because only this intrinsic records the location of its arguments in a side table. See :doc:`StackMaps`. +"``preserve_mostcc``" - The `PreserveMost` calling convention + This calling convention attempts to make the code in the caller as little + intrusive as possible. This calling convention behaves identical to the `C` + calling convention on how arguments and return values are passed, but it + uses a different set of caller/callee-saved registers. This alleviates the + burden of saving and recovering a large register set before and after the + call in the caller. + + - On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Floating-point registers + (XMMs/YMMs) are not preserved and need to be saved by the caller. + + The idea behind this convention is to support calls to runtime functions + that have a hot path and a cold path. The hot path is usually a small piece + of code that doesn't many registers. The cold path might need to call out to + another function and therefore only needs to preserve the caller-saved + registers, which haven't already been saved by the caller. + + This calling convention will be used by a future version of the ObjectiveC + runtime and should therefore still be considered experimental at this time. + Although this convention was created to optimize certain runtime calls to + the ObjectiveC runtime, it is not limited to this runtime and might be used + by other runtimes in the future too. The current implementation only + supports X86-64, but the intention is to support more architectures in the + future. +"``preserve_allcc``" - The `PreserveAll` calling convention + This calling convention attempts to make the code in the caller even less + intrusive than the `PreserveMost` calling convention. This calling + convention also behaves identical to the `C` calling convention on how + arguments and return values are passed, but it uses a different set of + caller/callee-saved registers. This removes the burden of saving and + recovering a large register set before and after the call in the caller. + + - On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Furthermore it also preserves + all floating-point registers (XMMs/YMMs). + + The idea behind this convention is to support calls to runtime functions + that don't need to call out to any other functions. + + This calling convention, like the `PreserveMost` calling convention, will be + used by a future version of the ObjectiveC runtime and should be considered + experimental at this time. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 4437af25574..1eaf4f7f469 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -58,6 +58,14 @@ namespace CallingConv { // stackmap and patchpoint intrinsics). AnyReg = 13, + // PreserveMost - Calling convention for runtime calls that preserves most + // registers. + PreserveMost = 14, + + // PreserveAll - Calling convention for runtime calls that preserves + // (almost) all registers. + PreserveAll = 15, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index ca3b7902b03..7fb8032ca99 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -563,6 +563,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_64_win64cc); KEYWORD(webkit_jscc); KEYWORD(anyregcc); + KEYWORD(preserve_mostcc); + KEYWORD(preserve_allcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index d4d6f7cee14..a1b5f9946c9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1369,6 +1369,8 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'x86_64_win64cc' /// ::= 'webkit_jscc' /// ::= 'anyregcc' +/// ::= 'preserve_mostcc' +/// ::= 'preserve_allcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1393,6 +1395,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; + case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break; + case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 5a6866dd149..50318500bf4 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -92,6 +92,7 @@ namespace lltok { kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, kw_x86_64_win64cc, kw_webkit_jscc, kw_anyregcc, + kw_preserve_mostcc, kw_preserve_allcc, // Attributes: kw_attributes, diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index eba05c5f28e..73b407ea1b4 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -73,6 +73,8 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::Cold: Out << "coldcc"; break; case CallingConv::WebKit_JS: Out << "webkit_jscc"; break; case CallingConv::AnyReg: Out << "anyregcc"; break; + case CallingConv::PreserveMost: Out << "preserve_mostcc"; break; + case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e733420e096..5b51a1e8e0b 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -620,6 +620,16 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; +// All GPRs - except r11 +def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, + R8, R9, R10, RSP)>; + +// All registers - except r11 +def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs, + (sequence "XMM%u", 0, 15))>; +def CSR_64_RT_AllRegs_AVX : CalleeSavedRegs<(add CSR_64_RT_MostRegs, + (sequence "YMM%u", 0, 15))>; + def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RBP, (sequence "XMM%u", 0, 15))>; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 220dc433fef..8a65dc1f2f7 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -245,6 +245,12 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; return CSR_64_AllRegs_SaveList; + case CallingConv::PreserveMost: + return CSR_64_RT_MostRegs_SaveList; + case CallingConv::PreserveAll: + if (HasAVX) + return CSR_64_RT_AllRegs_AVX_SaveList; + return CSR_64_RT_AllRegs_SaveList; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; @@ -292,6 +298,12 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (HasAVX) return CSR_64_AllRegs_AVX_RegMask; return CSR_64_AllRegs_RegMask; + case CallingConv::PreserveMost: + return CSR_64_RT_MostRegs_RegMask; + case CallingConv::PreserveAll: + if (HasAVX) + return CSR_64_RT_AllRegs_AVX_RegMask; + return CSR_64_RT_AllRegs_RegMask; case CallingConv::Intel_OCL_BI: { if (IsWin64 && HasAVX512) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; diff --git a/test/CodeGen/X86/preserve_allcc64.ll b/test/CodeGen/X86/preserve_allcc64.ll new file mode 100644 index 00000000000..545cd36ab95 --- /dev/null +++ b/test/CodeGen/X86/preserve_allcc64.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s + +define preserve_allcc void @preserve_allcc1() nounwind { +entry: +;SSE-LABEL: preserve_allcc1 +;SSE: pushq %r10 +;SSE-NEXT: pushq %r9 +;SSE-NEXT: pushq %r8 +;SSE-NEXT: pushq %rdi +;SSE-NEXT: pushq %rsi +;SSE-NEXT: pushq %rdx +;SSE-NEXT: pushq %rcx +;SSE-NEXT: pushq %rax +;SSE-NEXT: pushq %rbp +;SSE-NEXT: pushq %r15 +;SSE-NEXT: pushq %r14 +;SSE-NEXT: pushq %r13 +;SSE-NEXT: pushq %r12 +;SSE-NEXT: pushq %rbx +;SSE: movaps %xmm15 +;SSE-NEXT: movaps %xmm14 +;SSE-NEXT: movaps %xmm13 +;SSE-NEXT: movaps %xmm12 +;SSE-NEXT: movaps %xmm11 +;SSE-NEXT: movaps %xmm10 +;SSE-NEXT: movaps %xmm9 +;SSE-NEXT: movaps %xmm8 +;SSE-NEXT: movaps %xmm7 +;SSE-NEXT: movaps %xmm6 +;SSE-NEXT: movaps %xmm5 +;SSE-NEXT: movaps %xmm4 +;SSE-NEXT: movaps %xmm3 +;SSE-NEXT: movaps %xmm2 +;SSE-NEXT: movaps %xmm1 +;SSE-NEXT: movaps %xmm0 +;AVX-LABEL: preserve_allcc1 +;AVX: pushq %r10 +;AVX-NEXT: pushq %r9 +;AVX-NEXT: pushq %r8 +;AVX-NEXT: pushq %rdi +;AVX-NEXT: pushq %rsi +;AVX-NEXT: pushq %rdx +;AVX-NEXT: pushq %rcx +;AVX-NEXT: pushq %rax +;AVX-NEXT: pushq %rbp +;AVX-NEXT: pushq %r15 +;AVX-NEXT: pushq %r14 +;AVX-NEXT: pushq %r13 +;AVX-NEXT: pushq %r12 +;AVX-NEXT: pushq %rbx +;AVX: vmovups %ymm15 +;AVX-NEXT: vmovups %ymm14 +;AVX-NEXT: vmovups %ymm13 +;AVX-NEXT: vmovups %ymm12 +;AVX-NEXT: vmovups %ymm11 +;AVX-NEXT: vmovups %ymm10 +;AVX-NEXT: vmovups %ymm9 +;AVX-NEXT: vmovups %ymm8 +;AVX-NEXT: vmovups %ymm7 +;AVX-NEXT: vmovups %ymm6 +;AVX-NEXT: vmovups %ymm5 +;AVX-NEXT: vmovups %ymm4 +;AVX-NEXT: vmovups %ymm3 +;AVX-NEXT: vmovups %ymm2 +;AVX-NEXT: vmovups %ymm1 +;AVX-NEXT: vmovups %ymm0 + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret void +} + +; Make sure only R11 is saved before the call +declare preserve_allcc void @bar(i64, i64, double, double) +define void @preserve_allcc2() nounwind { +entry: +;SSE-LABEL: preserve_allcc2 +;SSE: movq %r11, [[REG:%[a-z0-9]+]] +;SSE-NOT: movaps %xmm +;SSE: movq [[REG]], %r11 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_allcc void @bar(i64 1, i64 2, double 3.0, double 4.0) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) + ret void +} diff --git a/test/CodeGen/X86/preserve_mostcc64.ll b/test/CodeGen/X86/preserve_mostcc64.ll new file mode 100644 index 00000000000..4ee293e1430 --- /dev/null +++ b/test/CodeGen/X86/preserve_mostcc64.ll @@ -0,0 +1,86 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s + +; Every GPR should be saved - except r11 +define preserve_mostcc void @preserve_mostcc1() nounwind { +entry: +;SSE-LABEL: preserve_mostcc1 +;SSE: pushq %r10 +;SSE-NEXT: pushq %r9 +;SSE-NEXT: pushq %r8 +;SSE-NEXT: pushq %rdi +;SSE-NEXT: pushq %rsi +;SSE-NEXT: pushq %rdx +;SSE-NEXT: pushq %rcx +;SSE-NEXT: pushq %rax +;SSE-NEXT: pushq %rbp +;SSE-NEXT: pushq %r15 +;SSE-NEXT: pushq %r14 +;SSE-NEXT: pushq %r13 +;SSE-NEXT: pushq %r12 +;SSE-NEXT: pushq %rbx +;AVX-LABEL: preserve_mostcc1 +;AVX: pushq %r10 +;AVX-NEXT: pushq %r9 +;AVX-NEXT: pushq %r8 +;AVX-NEXT: pushq %rdi +;AVX-NEXT: pushq %rsi +;AVX-NEXT: pushq %rdx +;AVX-NEXT: pushq %rcx +;AVX-NEXT: pushq %rax +;AVX-NEXT: pushq %rbp +;AVX-NEXT: pushq %r15 +;AVX-NEXT: pushq %r14 +;AVX-NEXT: pushq %r13 +;AVX-NEXT: pushq %r12 +;AVX-NEXT: pushq %rbx + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret void +} + +; Make sure R11 and XMMs are saved before the call +declare preserve_mostcc void @foo(i64, i64, double, double) +define void @preserve_mostcc2() nounwind { +entry: +;SSE-LABEL: preserve_mostcc2 +;SSE: movq %r11, [[REG:%[a-z0-9]+]] +;SSE: movaps %xmm2 +;SSE: movaps %xmm3 +;SSE: movaps %xmm4 +;SSE: movaps %xmm5 +;SSE: movaps %xmm6 +;SSE: movaps %xmm7 +;SSE: movaps %xmm8 +;SSE: movaps %xmm9 +;SSE: movaps %xmm10 +;SSE: movaps %xmm11 +;SSE: movaps %xmm12 +;SSE: movaps %xmm13 +;SSE: movaps %xmm14 +;SSE: movaps %xmm15 +;SSE: movq [[REG]], %r11 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_mostcc void @foo(i64 1, i64 2, double 3.0, double 4.0) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) + ret void +}