mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Special calling conventions for Intel OpenCL built-in library.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166566 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0ed9b8e0bf
commit
3575222175
@ -112,7 +112,11 @@ namespace CallingConv {
|
||||
/// Cannot have variable arguments.
|
||||
/// Can also be called by the host.
|
||||
/// Is externally visible.
|
||||
SPIR_KERNEL = 76
|
||||
SPIR_KERNEL = 76,
|
||||
|
||||
/// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins
|
||||
Intel_OCL_BI = 77
|
||||
|
||||
};
|
||||
} // End CallingConv namespace
|
||||
|
||||
|
@ -527,6 +527,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
||||
KEYWORD(ptx_device);
|
||||
KEYWORD(spir_kernel);
|
||||
KEYWORD(spir_func);
|
||||
KEYWORD(intel_ocl_bicc);
|
||||
|
||||
KEYWORD(cc);
|
||||
KEYWORD(c);
|
||||
|
@ -1094,6 +1094,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
|
||||
/// ::= /*empty*/
|
||||
/// ::= 'ccc'
|
||||
/// ::= 'fastcc'
|
||||
/// ::= 'kw_intel_ocl_bicc'
|
||||
/// ::= 'coldcc'
|
||||
/// ::= 'x86_stdcallcc'
|
||||
/// ::= 'x86_fastcallcc'
|
||||
@ -1125,6 +1126,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
|
||||
case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break;
|
||||
case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break;
|
||||
case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
|
||||
case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
|
||||
case lltok::kw_cc: {
|
||||
unsigned ArbitraryCC;
|
||||
Lex.Lex();
|
||||
|
@ -77,6 +77,7 @@ namespace lltok {
|
||||
kw_c,
|
||||
|
||||
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
|
||||
kw_intel_ocl_bicc,
|
||||
kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
|
||||
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
|
||||
kw_msp430_intrcc,
|
||||
|
@ -88,6 +88,21 @@ def RetCC_X86_32_Fast : CallingConv<[
|
||||
CCDelegateTo<RetCC_X86Common>
|
||||
]>;
|
||||
|
||||
// Intel_OCL_BI return-value convention.
|
||||
def RetCC_Intel_OCL_BI : CallingConv<[
|
||||
// Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
|
||||
CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
|
||||
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
|
||||
|
||||
// 256-bit FP vectors
|
||||
// No more than 4 registers
|
||||
CCIfType<[v8f32, v4f64, v8i32, v4i64],
|
||||
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
|
||||
|
||||
// i32, i64 in the standard way
|
||||
CCDelegateTo<RetCC_X86Common>
|
||||
]>;
|
||||
|
||||
// X86-64 C return-value convention.
|
||||
def RetCC_X86_64_C : CallingConv<[
|
||||
// The X86-64 calling convention always returns FP values in XMM0.
|
||||
@ -128,6 +143,10 @@ def RetCC_X86_64 : CallingConv<[
|
||||
|
||||
// This is the return-value convention used for the entire X86 backend.
|
||||
def RetCC_X86 : CallingConv<[
|
||||
|
||||
// Check if this is the Intel OpenCL built-ins calling convention
|
||||
CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
|
||||
|
||||
CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
|
||||
CCDelegateTo<RetCC_X86_32>
|
||||
]>;
|
||||
@ -235,6 +254,29 @@ def CC_X86_Win64_C : CallingConv<[
|
||||
CCIfType<[f80], CCAssignToStack<0, 0>>
|
||||
]>;
|
||||
|
||||
// X86-64 Intel OpenCL built-ins calling convention.
|
||||
def CC_Intel_OCL_BI : CallingConv<[
|
||||
CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
|
||||
|
||||
CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
|
||||
CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
|
||||
|
||||
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
|
||||
|
||||
// The SSE vector arguments are passed in XMM registers.
|
||||
CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
|
||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
|
||||
|
||||
// The 256-bit vector arguments are passed in YMM registers.
|
||||
CCIfType<[v8f32, v4f64, v8i32, v4i64],
|
||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
|
||||
|
||||
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
|
||||
CCDelegateTo<CC_X86_64_C>
|
||||
]>;
|
||||
|
||||
|
||||
def CC_X86_64_GHC : CallingConv<[
|
||||
// Promote i8/i16/i32 arguments to i64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
@ -408,6 +450,7 @@ def CC_X86_64 : CallingConv<[
|
||||
|
||||
// This is the argument convention used for the entire X86 backend.
|
||||
def CC_X86 : CallingConv<[
|
||||
CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
|
||||
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
|
||||
CCDelegateTo<CC_X86_32>
|
||||
]>;
|
||||
@ -426,3 +469,17 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
|
||||
|
||||
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
|
||||
(sequence "XMM%u", 6, 15))>;
|
||||
|
||||
|
||||
// Standard C + YMM6-15
|
||||
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
|
||||
R13, R14, R15,
|
||||
(sequence "YMM%u", 6, 15))>;
|
||||
|
||||
//Standard C + XMM 8-15
|
||||
def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
|
||||
(sequence "XMM%u", 8, 15))>;
|
||||
|
||||
//Standard C + YMM 8-15
|
||||
def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
|
||||
(sequence "YMM%u", 8, 15))>;
|
||||
|
@ -229,15 +229,26 @@ const uint16_t *
|
||||
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
bool callsEHReturn = false;
|
||||
bool ghcCall = false;
|
||||
bool oclBiCall = false;
|
||||
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
|
||||
|
||||
if (MF) {
|
||||
callsEHReturn = MF->getMMI().callsEHReturn();
|
||||
const Function *F = MF->getFunction();
|
||||
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
|
||||
oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
|
||||
}
|
||||
|
||||
if (ghcCall)
|
||||
return CSR_NoRegs_SaveList;
|
||||
if (oclBiCall) {
|
||||
if (HasAVX && IsWin64)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
|
||||
if (HasAVX && Is64Bit)
|
||||
return CSR_64_Intel_OCL_BI_AVX_SaveList;
|
||||
if (!HasAVX && !IsWin64 && Is64Bit)
|
||||
return CSR_64_Intel_OCL_BI_SaveList;
|
||||
}
|
||||
if (Is64Bit) {
|
||||
if (IsWin64)
|
||||
return CSR_Win64_SaveList;
|
||||
@ -252,6 +263,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
|
||||
const uint32_t*
|
||||
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
|
||||
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
|
||||
|
||||
if (CC == CallingConv::Intel_OCL_BI) {
|
||||
if (IsWin64 && HasAVX)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
|
||||
if (Is64Bit && HasAVX)
|
||||
return CSR_64_Intel_OCL_BI_AVX_RegMask;
|
||||
if (!HasAVX && !IsWin64 && Is64Bit)
|
||||
return CSR_64_Intel_OCL_BI_RegMask;
|
||||
}
|
||||
if (CC == CallingConv::GHC)
|
||||
return CSR_NoRegs_RegMask;
|
||||
if (!Is64Bit)
|
||||
|
@ -74,6 +74,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out)
|
||||
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
|
||||
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
|
||||
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
|
||||
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
|
||||
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
|
||||
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
|
||||
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break;
|
||||
|
@ -705,6 +705,7 @@ void Verifier::visitFunction(Function &F) {
|
||||
case CallingConv::Cold:
|
||||
case CallingConv::X86_FastCall:
|
||||
case CallingConv::X86_ThisCall:
|
||||
case CallingConv::Intel_OCL_BI:
|
||||
case CallingConv::PTX_Kernel:
|
||||
case CallingConv::PTX_Device:
|
||||
Assert1(!F.isVarArg(),
|
||||
|
107
test/CodeGen/X86/avx-intel-ocl.ll
Normal file
107
test/CodeGen/X86/avx-intel-ocl.ll
Normal file
@ -0,0 +1,107 @@
|
||||
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx | FileCheck -check-prefix=WIN32 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx | FileCheck -check-prefix=WIN64 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
|
||||
|
||||
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
|
||||
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
|
||||
; WIN64: testf16_inp
|
||||
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; WIN64: leaq {{.*}}(%rsp), %rcx
|
||||
; WIN64: call
|
||||
; WIN64: ret
|
||||
|
||||
; WIN32: testf16_inp
|
||||
; WIN32: movl %eax, (%esp)
|
||||
; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; WIN32: call
|
||||
; WIN32: ret
|
||||
|
||||
; NOT_WIN: testf16_inp
|
||||
; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
|
||||
; NOT_WIN: leaq {{.*}}(%rsp), %rdi
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: ret
|
||||
|
||||
;test calling conventions - input parameters
|
||||
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%y = alloca <16 x float>, align 16
|
||||
%x = fadd <16 x float> %a, %b
|
||||
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
||||
%2 = load <16 x float>* %y, align 16
|
||||
%3 = fadd <16 x float> %2, %1
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
;test calling conventions - preserved registers
|
||||
|
||||
; preserved ymm6-ymm15
|
||||
; WIN64: testf16_regs
|
||||
; WIN64: call
|
||||
; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
|
||||
; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
|
||||
; WIN64: ret
|
||||
|
||||
; preserved ymm8-ymm15
|
||||
; NOT_WIN: testf16_regs
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
|
||||
; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
|
||||
; NOT_WIN: ret
|
||||
|
||||
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%y = alloca <16 x float>, align 16
|
||||
%x = fadd <16 x float> %a, %b
|
||||
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
||||
%2 = load <16 x float>* %y, align 16
|
||||
%3 = fadd <16 x float> %1, %b
|
||||
%4 = fadd <16 x float> %2, %3
|
||||
ret <16 x float> %4
|
||||
}
|
||||
|
||||
; test calling conventions - prolog and epilog
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
|
||||
; WIN64: call
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
|
||||
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
|
||||
ret <16 x float> %c
|
||||
}
|
93
test/CodeGen/X86/sse-intel-ocl.ll
Normal file
93
test/CodeGen/X86/sse-intel-ocl.ll
Normal file
@ -0,0 +1,93 @@
|
||||
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s
|
||||
|
||||
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
|
||||
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
|
||||
; WIN64: testf16_inp
|
||||
; WIN64: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN64: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN64: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN64: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN64: leaq {{.*}}(%rsp), %rcx
|
||||
; WIN64: call
|
||||
; WIN64: ret
|
||||
|
||||
; WIN32: testf16_inp
|
||||
; WIN32: movl %eax, (%esp)
|
||||
; WIN32: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN32: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN32: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN32: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; WIN32: call
|
||||
; WIN32: ret
|
||||
|
||||
; NOT_WIN: testf16_inp
|
||||
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
|
||||
; NOT_WIN: leaq {{.*}}(%rsp), %rdi
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: ret
|
||||
|
||||
;test calling conventions - input parameters
|
||||
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%y = alloca <16 x float>, align 16
|
||||
%x = fadd <16 x float> %a, %b
|
||||
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
||||
%2 = load <16 x float>* %y, align 16
|
||||
%3 = fadd <16 x float> %2, %1
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
;test calling conventions - preserved registers
|
||||
|
||||
; preserved xmm6-xmm15
|
||||
; WIN64: testf16_regs
|
||||
; WIN64: call
|
||||
; WIN64: addps {{%xmm[6-9]}}, {{.*}}
|
||||
; WIN64: addps {{%xmm[6-9]}}, {{.*}}
|
||||
; WIN64: ret
|
||||
|
||||
; preserved xmm8-xmm15
|
||||
; NOT_WIN: testf16_regs
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
|
||||
; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
|
||||
; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
|
||||
; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
|
||||
; NOT_WIN: ret
|
||||
|
||||
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%y = alloca <16 x float>, align 16
|
||||
%x = fadd <16 x float> %a, %b
|
||||
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
||||
%2 = load <16 x float>* %y, align 16
|
||||
%3 = fadd <16 x float> %1, %b
|
||||
%4 = fadd <16 x float> %2, %3
|
||||
ret <16 x float> %4
|
||||
}
|
||||
|
||||
; test calling conventions - prolog and epilog
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
|
||||
; NOT_WIN: call
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
|
||||
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
|
||||
ret <16 x float> %c
|
||||
}
|
Loading…
Reference in New Issue
Block a user