mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
AVX-512: Added all forms of COMPRESS instruction
+ intrinsics + tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224019 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c3692e5c67
commit
11fb1d0eb5
@ -3458,6 +3458,108 @@ let TargetPrefix = "x86" in {
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Compress, Expand
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_compress_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssf512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdf512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssf256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdf256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssf128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdf128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_compress_store_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresf512_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredf512_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresf256_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredf256_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresf128_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredf128_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_x86_avx512_mask_compress_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compresssi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_compress_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressdi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_compress_store_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresi512_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredi512_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresi256_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredi256_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoresi128_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_compress_store_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredi128_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
}
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
|
||||
|
@ -16957,9 +16957,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG);
|
||||
case VSHIFT_MASK:
|
||||
return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG),
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
|
||||
return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl,
|
||||
Op.getSimpleValueType(),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2), DAG),
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget,
|
||||
DAG);
|
||||
case COMPRESS_TO_REG: {
|
||||
SDValue Mask = Op.getOperand(3);
|
||||
SDValue DataToCompress = Op.getOperand(1);
|
||||
SDValue PassThru = Op.getOperand(2);
|
||||
if (isAllOnes(Mask)) // return data as is
|
||||
return Op.getOperand(1);
|
||||
EVT VT = Op.getValueType();
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDLoc dl(Op);
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0));
|
||||
|
||||
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
|
||||
PassThru);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -17477,6 +17499,31 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
|
||||
Results.push_back(Store);
|
||||
return DAG.getMergeValues(Results, dl);
|
||||
}
|
||||
case COMPRESS_TO_MEM: {
|
||||
SDLoc dl(Op);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
SDValue DataToCompress = Op.getOperand(3);
|
||||
SDValue Addr = Op.getOperand(2);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
|
||||
if (isAllOnes(Mask)) // return just a store
|
||||
return DAG.getStore(Chain, dl, DataToCompress, Addr,
|
||||
MachinePointerInfo(), false, false, 0);
|
||||
|
||||
EVT VT = DataToCompress.getValueType();
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0));
|
||||
|
||||
SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
|
||||
DataToCompress, DAG.getUNDEF(VT));
|
||||
return DAG.getStore(Chain, dl, Compressed, Addr,
|
||||
MachinePointerInfo(), false, false, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -19662,6 +19709,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
|
||||
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
|
||||
case X86ISD::XTEST: return "X86ISD::XTEST";
|
||||
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -379,6 +379,10 @@ namespace llvm {
|
||||
FMADDSUB,
|
||||
FMSUBADD,
|
||||
|
||||
// Compress and expand
|
||||
COMPRESS,
|
||||
EXPAND,
|
||||
|
||||
// Save xmm argument registers to the stack, according to %al. An operator
|
||||
// is needed so that this can be expanded with control flow.
|
||||
VASTART_SAVE_XMM_REGS,
|
||||
|
@ -5281,3 +5281,51 @@ multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
|
||||
}
|
||||
|
||||
defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - COMPRESS and EXPAND
|
||||
//
|
||||
multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
|
||||
string OpcodeStr> {
|
||||
def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
|
||||
_.ImmAllZerosV)))]>, EVEX_KZ;
|
||||
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
|
||||
_.RC:$src0)))]>, EVEX_K;
|
||||
|
||||
let mayStore = 1 in {
|
||||
def mrk : AVX5128I<opc, MRMDestMem, (outs),
|
||||
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
|
||||
[(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
|
||||
addr:$dst)]>,
|
||||
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo VTInfo> {
|
||||
defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
|
||||
defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
|
||||
EVEX;
|
||||
defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
|
||||
EVEX, VEX_W;
|
||||
defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
|
||||
EVEX;
|
||||
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
|
||||
EVEX, VEX_W;
|
||||
|
||||
|
@ -283,6 +283,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
||||
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
|
||||
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
|
||||
|
||||
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
|
||||
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
|
||||
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE Complex Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -21,7 +21,8 @@ enum IntrinsicType {
|
||||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
|
||||
COMPRESS_TO_REG, COMPRESS_TO_MEM
|
||||
};
|
||||
|
||||
struct IntrinsicData {
|
||||
@ -70,6 +71,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
|
||||
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_512,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_128,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_256,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_512,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_128,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_256,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_512,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_128,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_256,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
|
||||
@ -207,6 +233,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_TO_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
|
@ -67,7 +67,7 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK_LABEL: test_cmp_d_256
|
||||
; CHECK-LABEL: test_cmp_d_256
|
||||
; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -96,7 +96,7 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_cmp_d_256
|
||||
; CHECK-LABEL: test_mask_cmp_d_256
|
||||
; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -127,7 +127,7 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK_LABEL: test_ucmp_d_256
|
||||
; CHECK-LABEL: test_ucmp_d_256
|
||||
; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -156,7 +156,7 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_ucmp_d_256
|
||||
; CHECK-LABEL: test_mask_ucmp_d_256
|
||||
; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -187,7 +187,7 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK_LABEL: test_cmp_q_256
|
||||
; CHECK-LABEL: test_cmp_q_256
|
||||
; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -216,7 +216,7 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_cmp_q_256
|
||||
; CHECK-LABEL: test_mask_cmp_q_256
|
||||
; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -247,7 +247,7 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK_LABEL: test_ucmp_q_256
|
||||
; CHECK-LABEL: test_ucmp_q_256
|
||||
; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -276,7 +276,7 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_ucmp_q_256
|
||||
; CHECK-LABEL: test_mask_ucmp_q_256
|
||||
; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -373,7 +373,7 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK_LABEL: test_cmp_d_128
|
||||
; CHECK-LABEL: test_cmp_d_128
|
||||
; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -402,7 +402,7 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_cmp_d_128
|
||||
; CHECK-LABEL: test_mask_cmp_d_128
|
||||
; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -433,7 +433,7 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK_LABEL: test_ucmp_d_128
|
||||
; CHECK-LABEL: test_ucmp_d_128
|
||||
; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -462,7 +462,7 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_ucmp_d_128
|
||||
; CHECK-LABEL: test_mask_ucmp_d_128
|
||||
; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -493,7 +493,7 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK_LABEL: test_cmp_q_128
|
||||
; CHECK-LABEL: test_cmp_q_128
|
||||
; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -522,7 +522,7 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_cmp_q_128
|
||||
; CHECK-LABEL: test_mask_cmp_q_128
|
||||
; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -553,7 +553,7 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
|
||||
|
||||
define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; CHECK_LABEL: test_ucmp_q_128
|
||||
; CHECK-LABEL: test_ucmp_q_128
|
||||
; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -582,7 +582,7 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
; CHECK_LABEL: test_mask_ucmp_q_128
|
||||
; CHECK-LABEL: test_mask_ucmp_q_128
|
||||
; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
|
||||
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
|
||||
@ -611,3 +611,90 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: compr1
|
||||
; CHECK: vcompresspd %zmm0
|
||||
define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
|
||||
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr2
|
||||
; CHECK: vcompresspd %ymm0
|
||||
define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
|
||||
call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr3
|
||||
; CHECK: vcompressps %xmm0
|
||||
define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
|
||||
call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr4
|
||||
; CHECK: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
|
||||
define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr5
|
||||
; CHECK: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
|
||||
define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr6
|
||||
; CHECK: vcompressps %xmm0
|
||||
define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr7
|
||||
; CHECK-NOT: vcompress
|
||||
; CHECK: vmovapd
|
||||
define void @compr7(i8* %addr, <8 x double> %data) {
|
||||
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: compr8
|
||||
; CHECK-NOT: vcompressps %xmm0
|
||||
define <4 x float> @compr8(<4 x float> %data) {
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: compr9
|
||||
; CHECK: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
|
||||
define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
|
||||
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: compr10
|
||||
; CHECK: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
|
||||
define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
|
||||
|
Loading…
x
Reference in New Issue
Block a user