mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
AVX-512: Added more intrinsics for convert and min/max.
Removed vzeroupper from AVX-512 mode - our optimization gude does not recommend to insert vzeroupper at all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198557 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5c1c0bd714
commit
3a880de6e6
@ -2747,7 +2747,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtudq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector load with broadcast
|
||||
@ -2800,18 +2806,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
// Arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
|
||||
llvm_v16f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
||||
llvm_v8f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
|
||||
llvm_v16f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
||||
llvm_v8f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
|
@ -11462,14 +11462,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
case Intrinsic::x86_avx512_max_ps_512:
|
||||
case Intrinsic::x86_avx512_max_pd_512:
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256:
|
||||
case Intrinsic::x86_avx512_min_ps_512:
|
||||
case Intrinsic::x86_avx512_min_pd_512: {
|
||||
case Intrinsic::x86_avx_min_pd_256: {
|
||||
unsigned Opcode;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
@ -11477,16 +11473,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
case Intrinsic::x86_avx512_max_ps_512:
|
||||
case Intrinsic::x86_avx512_max_pd_512:
|
||||
Opcode = X86ISD::FMAX;
|
||||
break;
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256:
|
||||
case Intrinsic::x86_avx512_min_ps_512:
|
||||
case Intrinsic::x86_avx512_min_pd_512:
|
||||
Opcode = X86ISD::FMIN;
|
||||
break;
|
||||
}
|
||||
|
@ -2038,6 +2038,25 @@ defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem,
|
||||
SSE_ALU_ITINS_P.d, 0>,
|
||||
EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
|
||||
(i16 -1), FROUND_CURRENT)),
|
||||
(VMAXPSZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
|
||||
(i8 -1), FROUND_CURRENT)),
|
||||
(VMAXPDZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
|
||||
(i16 -1), FROUND_CURRENT)),
|
||||
(VMINPSZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
|
||||
(i8 -1), FROUND_CURRENT)),
|
||||
(VMINPDZrr VR512:$src1, VR512:$src2)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 VPTESTM instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2731,7 +2750,7 @@ def : Pat<(extloadf32 addr:$src),
|
||||
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
|
||||
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
|
||||
Domain d> {
|
||||
@ -2751,7 +2770,7 @@ let neverHasSideEffects = 1 in {
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
multiclass avx512_vcvtt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
|
||||
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
|
||||
Domain d> {
|
||||
@ -2768,8 +2787,7 @@ let neverHasSideEffects = 1 in {
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
|
||||
defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
|
||||
defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
|
||||
memopv8f64, f512mem, v8f32, v8f64,
|
||||
SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
@ -2784,7 +2802,7 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
|
||||
// AVX-512 Vector convert from sign integer to float/double
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
|
||||
defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
|
||||
memopv8i64, i512mem, v16f32, v16i32,
|
||||
SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
@ -2793,17 +2811,17 @@ defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
|
||||
SSEPackedDouble>, EVEX_V512, XS,
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
defm VCVTTPS2DQZ : avx512_vcvtt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
|
||||
defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
|
||||
memopv16f32, f512mem, v16i32, v16f32,
|
||||
SSEPackedSingle>, EVEX_V512, XS,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VCVTTPD2DQZ : avx512_vcvtt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
|
||||
defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
|
||||
memopv8f64, f512mem, v8i32, v8f64,
|
||||
SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
defm VCVTTPS2UDQZ : avx512_vcvtt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
|
||||
defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
|
||||
memopv16f32, f512mem, v16i32, v16f32,
|
||||
SSEPackedSingle>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
@ -2813,7 +2831,7 @@ def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
|
||||
(v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
|
||||
(VCVTTPS2UDQZrr VR512:$src)>;
|
||||
|
||||
defm VCVTTPD2UDQZ : avx512_vcvtt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
|
||||
defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
|
||||
memopv8f64, f512mem, v8i32, v8f64,
|
||||
SSEPackedDouble>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
@ -2828,7 +2846,7 @@ defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
|
||||
SSEPackedDouble>, EVEX_V512, XS,
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
|
||||
defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
|
||||
memopv16i32, f512mem, v16f32, v16i32,
|
||||
SSEPackedSingle>, EVEX_V512, XD,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
@ -2839,9 +2857,17 @@ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
|
||||
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
|
||||
(v16f32 immAllZerosV), (i16 -1), imm:$rc)),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
|
||||
(VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VCVTDQ2PDZrr VR256X:$src)>;
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
|
||||
(VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VCVTUDQ2PDZrr VR256X:$src)>;
|
||||
|
||||
multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, PatFrag mem_frag,
|
||||
|
@ -17,6 +17,7 @@
|
||||
#define DEBUG_TYPE "x86-vzeroupper"
|
||||
#include "X86.h"
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
@ -105,28 +106,20 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
|
||||
}
|
||||
|
||||
static bool isYmmReg(unsigned Reg) {
|
||||
return (Reg >= X86::YMM0 && Reg <= X86::YMM31);
|
||||
}
|
||||
|
||||
static bool isZmmReg(unsigned Reg) {
|
||||
return (Reg >= X86::ZMM0 && Reg <= X86::ZMM31);
|
||||
return (Reg >= X86::YMM0 && Reg <= X86::YMM15);
|
||||
}
|
||||
|
||||
static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
|
||||
for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
|
||||
E = MRI.livein_end(); I != E; ++I)
|
||||
if (isYmmReg(I->first) || isZmmReg(I->first))
|
||||
if (isYmmReg(I->first))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
|
||||
for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
|
||||
if (!MO.clobbersPhysReg(reg))
|
||||
return false;
|
||||
}
|
||||
for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
|
||||
for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
|
||||
if (!MO.clobbersPhysReg(reg))
|
||||
return false;
|
||||
}
|
||||
@ -155,11 +148,7 @@ static bool clobbersAnyYmmReg(MachineInstr *MI) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isRegMask())
|
||||
continue;
|
||||
for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
|
||||
if (MO.clobbersPhysReg(reg))
|
||||
return true;
|
||||
}
|
||||
for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
|
||||
for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
|
||||
if (MO.clobbersPhysReg(reg))
|
||||
return true;
|
||||
}
|
||||
@ -170,6 +159,8 @@ static bool clobbersAnyYmmReg(MachineInstr *MI) {
|
||||
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
|
||||
/// vzero upper instructions before function calls.
|
||||
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (MF.getTarget().getSubtarget<X86Subtarget>().hasAVX512())
|
||||
return false;
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
bool EverMadeChange = false;
|
||||
|
@ -447,4 +447,70 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i1> %a0, <8 x i64> %a1,
|
||||
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
|
||||
ret i8 %res
|
||||
}
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
|
||||
|
||||
; cvt intrinsics
|
||||
define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
|
||||
;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float>%res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
|
||||
;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float>%res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
|
||||
;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
|
||||
ret <8 x double>%res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
|
||||
|
||||
define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
|
||||
;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
|
||||
ret <8 x double>%res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
|
||||
|
||||
; fp min - max
|
||||
define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK: vmaxps
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
|
||||
<16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
|
||||
; CHECK: vmaxpd
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double>zeroinitializer, i8 -1, i32 4)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
|
||||
<8 x double>, i8, i32)
|
||||
|
||||
define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK: vminps
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
|
||||
<16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
|
||||
; CHECK: vminpd
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double>zeroinitializer, i8 -1, i32 4)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
|
||||
<8 x double>, i8, i32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user