mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
[AVX512] Add 512b integer shift by variable intrinsics and patterns.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222786 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fb73fbf793
commit
9f4bb0420d
@ -1603,6 +1603,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_mask_psrai_q : GCCBuiltin<"__builtin_ia32_psraqi512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_q : GCCBuiltin<"__builtin_ia32_psllq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_d : GCCBuiltin<"__builtin_ia32_psrld512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_d : GCCBuiltin<"__builtin_ia32_psrad512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Pack ops.
|
||||
|
@ -16872,7 +16872,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
RoundingMode),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
|
||||
case INTR_TYPE_2OP_MASK: {
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
|
||||
Op.getOperand(2)),
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
|
||||
}
|
||||
case CMP_MASK:
|
||||
case CMP_MASK_CC: {
|
||||
// Comparison intrinsics with masks.
|
||||
@ -16924,7 +16928,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
case VSHIFT_MASK:
|
||||
return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG),
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);;
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3196,6 +3196,7 @@ def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
|
||||
def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
|
||||
(v8i64 VR512:$src2), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Shift instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3214,73 +3215,57 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
|
||||
}
|
||||
|
||||
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
RegisterClass RC, ValueType vt, ValueType SrcVT,
|
||||
PatFrag bc_frag, RegisterClass KRC> {
|
||||
// src2 is always 128-bit
|
||||
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, VR128X:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
|
||||
SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
|
||||
def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, VR128X:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
|
||||
[], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
|
||||
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1,
|
||||
(bc_frag (memopv2i64 addr:$src2)))))],
|
||||
SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
|
||||
def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
|
||||
[], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
|
||||
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
|
||||
// src2 is always 128-bit
|
||||
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (bc_frag (memopv2i64 addr:$src2)))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
|
||||
}
|
||||
|
||||
multiclass avx512_varshift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
|
||||
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
|
||||
}
|
||||
|
||||
multiclass avx512_varshift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
|
||||
SDNode OpNode> {
|
||||
defm D : avx512_varshift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
|
||||
v16i32_info>, EVEX_CD8<32, CD8VQ>;
|
||||
defm Q : avx512_varshift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
|
||||
v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
}
|
||||
|
||||
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
|
||||
v16i32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
|
||||
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
|
||||
v8i64_info>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
|
||||
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
|
||||
v16i32_info>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
|
||||
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
|
||||
v8i64_info>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
|
||||
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
|
||||
v16i32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
|
||||
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
|
||||
v8i64_info>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
|
||||
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
defm VPSRL : avx512_varshift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
|
||||
defm VPSLL : avx512_varshift_types<0xF2, 0xF3, "vpsll", X86vshl>;
|
||||
defm VPSRA : avx512_varshift_types<0xE2, 0xE2, "vpsra", X86vsra>;
|
||||
|
||||
//===-------------------------------------------------------------------===//
|
||||
// Variable Bit Shifts
|
||||
|
@ -21,7 +21,7 @@ enum IntrinsicType {
|
||||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK_RM
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK
|
||||
};
|
||||
|
||||
struct IntrinsicData {
|
||||
@ -195,10 +195,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psra_q, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
|
||||
|
@ -1088,3 +1088,141 @@ define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psll_d
|
||||
; CHECK: vpslld
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psll_d
|
||||
; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psll_d
|
||||
; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
|
||||
|
||||
define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psll_q
|
||||
; CHECK: vpsllq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psll_q
|
||||
; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psll_q
|
||||
; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psrl_d
|
||||
; CHECK: vpsrld
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psrl_d
|
||||
; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
|
||||
; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
|
||||
|
||||
define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psrl_q
|
||||
; CHECK: vpsrlq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psrl_q
|
||||
; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
|
||||
; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psra_d
|
||||
; CHECK: vpsrad
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psra_d
|
||||
; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psra_d
|
||||
; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
|
||||
|
||||
define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK-LABEL: test_x86_avx512_psra_q
|
||||
; CHECK: vpsraq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_mask_psra_q
|
||||
; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_x86_avx512_maskz_psra_q
|
||||
; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
|
||||
|
Loading…
Reference in New Issue
Block a user