mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
AVX-512: Added EXPAND instructions and intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224241 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0bf492d3c1
commit
3f2027522c
@ -3584,6 +3584,108 @@ let TargetPrefix = "x86" in {
|
||||
GCCBuiltin<"__builtin_ia32_compressstoredi128_mask">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
|
||||
llvm_i8_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
// expand
|
||||
def int_x86_avx512_mask_expand_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_expand_load_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsf512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddf512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsf256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddf256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsf128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddf128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_mask_expand_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_expand_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_expand_load_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloadsi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx512_mask_expand_load_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_expandloaddi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty,
|
||||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
|
||||
}
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in {
|
||||
|
@ -16963,7 +16963,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
Op.getOperand(2), DAG),
|
||||
Op.getOperand(4), Op.getOperand(3), Subtarget,
|
||||
DAG);
|
||||
case COMPRESS_TO_REG: {
|
||||
case COMPRESS_EXPAND_IN_REG: {
|
||||
SDValue Mask = Op.getOperand(3);
|
||||
SDValue DataToCompress = Op.getOperand(1);
|
||||
SDValue PassThru = Op.getOperand(2);
|
||||
@ -17524,6 +17524,34 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
|
||||
return DAG.getStore(Chain, dl, Compressed, Addr,
|
||||
MachinePointerInfo(), false, false, 0);
|
||||
}
|
||||
case EXPAND_FROM_MEM: {
|
||||
SDLoc dl(Op);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
SDValue PathThru = Op.getOperand(3);
|
||||
SDValue Addr = Op.getOperand(2);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (isAllOnes(Mask)) // return just a load
|
||||
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
|
||||
false, 0);
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0));
|
||||
|
||||
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
|
||||
false, false, false, 0);
|
||||
|
||||
SmallVector<SDValue, 2> Results;
|
||||
Results.push_back(DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand,
|
||||
PathThru));
|
||||
Results.push_back(Chain);
|
||||
return DAG.getMergeValues(Results, dl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -19710,6 +19738,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
|
||||
case X86ISD::XTEST: return "X86ISD::XTEST";
|
||||
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
|
||||
case X86ISD::EXPAND: return "X86ISD::EXPAND";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5323,3 +5323,58 @@ defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info
|
||||
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
|
||||
EVEX, VEX_W;
|
||||
|
||||
// expand
|
||||
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
|
||||
string OpcodeStr> {
|
||||
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
|
||||
_.ImmAllZerosV)))]>, EVEX_KZ;
|
||||
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
|
||||
(_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
|
||||
|
||||
let mayLoad = 1, Constraints = "$src0 = $dst" in
|
||||
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
|
||||
(_.VT (bitconvert
|
||||
(_.LdFrag addr:$src))),
|
||||
_.RC:$src0)))]>,
|
||||
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.MemOp:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
|
||||
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src))),
|
||||
_.ImmAllZerosV)))]>,
|
||||
EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||
|
||||
}
|
||||
|
||||
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo VTInfo> {
|
||||
defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
|
||||
defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
|
||||
EVEX;
|
||||
defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
|
||||
EVEX, VEX_W;
|
||||
defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
|
||||
EVEX;
|
||||
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
|
||||
EVEX, VEX_W;
|
||||
|
@ -286,6 +286,9 @@ def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
|
||||
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
|
||||
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
|
||||
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
|
||||
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
|
||||
[SDTCisSameAs<0, 3>,
|
||||
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE Complex Patterns
|
||||
|
@ -22,7 +22,7 @@ enum IntrinsicType {
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
|
||||
COMPRESS_TO_REG, COMPRESS_TO_MEM
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM
|
||||
};
|
||||
|
||||
struct IntrinsicData {
|
||||
@ -95,7 +95,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
|
||||
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_512,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_128,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_256,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_512,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_128,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_256,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_512,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_128,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_256,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
|
||||
@ -233,30 +256,55 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_TO_REG,
|
||||
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_d_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_q_128, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_q_256, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
|
@ -698,3 +698,94 @@ define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
|
||||
|
||||
; Expand
|
||||
|
||||
; CHECK-LABEL: expand1
|
||||
; CHECK: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
|
||||
define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand2
|
||||
; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
|
||||
define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand3
|
||||
; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
|
||||
define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand4
|
||||
; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
|
||||
define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand5
|
||||
; CHECK: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
|
||||
define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand6
|
||||
; CHECK: vexpandps %xmm0
|
||||
define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand7
|
||||
; CHECK-NOT: vexpand
|
||||
; CHECK: vmovapd
|
||||
define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: expand8
|
||||
; CHECK-NOT: vexpandps %xmm0
|
||||
define <4 x float> @expand8(<4 x float> %data) {
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: expand9
|
||||
; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
|
||||
define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
|
||||
|
||||
; CHECK-LABEL: expand10
|
||||
; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
|
||||
define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user