mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-29 10:32:47 +00:00
AVX-512: intrinsics for VPADD, VPMULDQ and VPSUB
by Asaf Badouh (asaf.badouh@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233906 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
91a9b15130
commit
4eb165220f
@ -1408,12 +1408,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector min, max
|
||||
@ -3242,6 +3236,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Integer arithmetic ops
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
// Gather and Scatter ops
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
|
||||
|
@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
|
||||
itins, HasBWI, IsCommutable>;
|
||||
}
|
||||
|
||||
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
|
||||
ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
|
||||
PatFrag memop_frag, X86MemOperand x86memop,
|
||||
PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
|
||||
string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
|
||||
let isCommutable = IsCommutable in
|
||||
{
|
||||
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
|
||||
[], itins.rr>, EVEX_4V, EVEX_K;
|
||||
def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
|
||||
"|$dst {${mask}} {z}, $src1, $src2}"),
|
||||
[], itins.rr>, EVEX_4V, EVEX_KZ;
|
||||
}
|
||||
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
|
||||
SDNode OpNode,X86VectorVTInfo _Src,
|
||||
X86VectorVTInfo _Dst, bit IsCommutable = 0> {
|
||||
defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
|
||||
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1","$src1, $src2",
|
||||
(_Dst.VT (OpNode
|
||||
(_Src.VT _Src.RC:$src1),
|
||||
(_Src.VT _Src.RC:$src2))),
|
||||
"",itins.rr, IsCommutable>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
let mayLoad = 1 in {
|
||||
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
|
||||
[], itins.rm>, EVEX_4V, EVEX_K;
|
||||
def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
|
||||
[], itins.rm>, EVEX_4V, EVEX_KZ;
|
||||
def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86scalar_mop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
|
||||
", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
|
||||
[], itins.rm>, EVEX_4V, EVEX_B;
|
||||
def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
|
||||
", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
|
||||
BrdcstStr, "}"),
|
||||
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
|
||||
def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
|
||||
", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
|
||||
BrdcstStr, "}"),
|
||||
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
|
||||
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
|
||||
(bitconvert (_Src.LdFrag addr:$src2)))),
|
||||
"", itins.rm>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
|
||||
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
|
||||
(ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
|
||||
OpcodeStr,
|
||||
"${src2}"##_Dst.BroadcastStr##", $src1",
|
||||
"$src1, ${src2}"##_Dst.BroadcastStr,
|
||||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32
|
||||
(_Dst.VT (X86VBroadcast
|
||||
(_Dst.ScalarLdFrag addr:$src2)))))),
|
||||
"", itins.rm>,
|
||||
AVX512BIBase, EVEX_4V, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
|
||||
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
|
||||
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
|
||||
|
||||
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
|
||||
loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
|
||||
SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
|
||||
X86pmuldq, v16i32_info, v8i64_info, 1>,
|
||||
T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
||||
defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
|
||||
loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
|
||||
SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
||||
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
|
||||
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
|
||||
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
|
||||
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VPMULDQZrr VR512:$src1, VR512:$src2)>;
|
||||
defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
|
||||
X86pmuludq, v16i32_info, v8i64_info, 1>,
|
||||
EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
||||
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
|
||||
|
@ -334,6 +334,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
@ -360,6 +362,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PMULDQ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PMULUDQ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||
@ -380,6 +386,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
@ -515,14 +515,6 @@ define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
|
||||
<8 x i64>zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
|
||||
|
||||
define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
|
||||
; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
|
||||
%res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
|
||||
@ -1703,3 +1695,471 @@ define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %pass
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rr
|
||||
;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rrk
|
||||
;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rrkz
|
||||
;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rm
|
||||
;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rmk
|
||||
;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rmkz
|
||||
;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rmb
|
||||
;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rmbk
|
||||
;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi32_rmbkz
|
||||
;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rr
|
||||
;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rrk
|
||||
;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rrkz
|
||||
;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rm
|
||||
;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rmk
|
||||
;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rmkz
|
||||
;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rmb
|
||||
;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rmbk
|
||||
;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi32_rmbkz
|
||||
;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rr
|
||||
;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rrk
|
||||
;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rrkz
|
||||
;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rm
|
||||
;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rmk
|
||||
;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rmkz
|
||||
;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rmb
|
||||
;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rmbk
|
||||
;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_add_epi64_rmbkz
|
||||
;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rr
|
||||
;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rrk
|
||||
;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rrkz
|
||||
;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rm
|
||||
;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rmk
|
||||
;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rmkz
|
||||
;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rmb
|
||||
;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rmbk
|
||||
;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_sub_epi64_rmbkz
|
||||
;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rr
|
||||
;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rrk
|
||||
;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rrkz
|
||||
;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rm
|
||||
;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rmk
|
||||
;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rmkz
|
||||
;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rmb
|
||||
;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rmbk
|
||||
;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epi32_rmbkz
|
||||
;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rr
|
||||
;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rrk
|
||||
;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rrkz
|
||||
;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rm
|
||||
;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rmk
|
||||
;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rmkz
|
||||
;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rmb
|
||||
;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rmbk
|
||||
;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_mul_epu32_rmbkz
|
||||
;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%b = bitcast <8 x i64> %b64 to <16 x i32>
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
|
||||
|
Loading…
Reference in New Issue
Block a user