AVX-512: Added EXPAND instructions and intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224241 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2014-12-15 10:03:52 +00:00
parent 0bf492d3c1
commit 3f2027522c
6 changed files with 343 additions and 15 deletions

View File

@ -3584,6 +3584,108 @@ let TargetPrefix = "x86" in {
GCCBuiltin<"__builtin_ia32_compressstoredi128_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrReadWriteArgMem]>;
// expand
def int_x86_avx512_mask_expand_ps_512 :
GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_pd_512 :
GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_ps_256 :
GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_pd_256 :
GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_ps_128 :
GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_pd_128 :
GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_load_ps_512 :
GCCBuiltin<"__builtin_ia32_expandloadsf512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_pd_512 :
GCCBuiltin<"__builtin_ia32_expandloaddf512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_ps_256 :
GCCBuiltin<"__builtin_ia32_expandloadsf256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_pd_256 :
GCCBuiltin<"__builtin_ia32_expandloaddf256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_ps_128 :
GCCBuiltin<"__builtin_ia32_expandloadsf128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_pd_128 :
GCCBuiltin<"__builtin_ia32_expandloaddf128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_d_512 :
GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_q_512 :
GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_d_256 :
GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_q_256 :
GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_d_128 :
GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_q_128 :
GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_load_d_512 :
GCCBuiltin<"__builtin_ia32_expandloadsi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty,
llvm_i16_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_q_512 :
GCCBuiltin<"__builtin_ia32_expandloaddi512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_d_256 :
GCCBuiltin<"__builtin_ia32_expandloadsi256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_q_256 :
GCCBuiltin<"__builtin_ia32_expandloaddi256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_d_128 :
GCCBuiltin<"__builtin_ia32_expandloadsi128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrReadArgMem]>;
def int_x86_avx512_mask_expand_load_q_128 :
GCCBuiltin<"__builtin_ia32_expandloaddi128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrReadArgMem]>;
}
// Misc.
let TargetPrefix = "x86" in {

View File

@ -16963,7 +16963,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Op.getOperand(2), DAG),
Op.getOperand(4), Op.getOperand(3), Subtarget,
DAG);
case COMPRESS_TO_REG: {
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@ -17524,6 +17524,34 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getStore(Chain, dl, Compressed, Addr,
MachinePointerInfo(), false, false, 0);
}
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PathThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
EVT VT = Op.getValueType();
if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, 0);
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
false, false, false, 0);
SmallVector<SDValue, 2> Results;
Results.push_back(DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand,
PathThru));
Results.push_back(Chain);
return DAG.getMergeValues(Results, dl);
}
}
}
@ -19710,6 +19738,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
case X86ISD::EXPAND: return "X86ISD::EXPAND";
}
}

View File

@ -5323,3 +5323,58 @@ defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
EVEX, VEX_W;
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
_.ImmAllZerosV)))]>, EVEX_KZ;
let Constraints = "$src0 = $dst" in
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
let mayLoad = 1, Constraints = "$src0 = $dst" in
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT (bitconvert
(_.LdFrag addr:$src))),
_.RC:$src0)))]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
let mayLoad = 1 in
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT (bitconvert (_.LdFrag addr:$src))),
_.ImmAllZerosV)))]>,
EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
}
defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
EVEX;
defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
EVEX, VEX_W;
defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
EVEX;
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;

View File

@ -286,6 +286,9 @@ def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 3>,
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns

View File

@ -22,7 +22,7 @@ enum IntrinsicType {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
COMPRESS_TO_REG, COMPRESS_TO_MEM
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM
};
struct IntrinsicData {
@ -95,7 +95,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@ -233,30 +256,55 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_TO_REG,
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_256, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),

View File

@ -698,3 +698,94 @@ define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
}
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
; Expand
; CHECK-LABEL: expand1
; CHECK: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
; CHECK-LABEL: expand2
; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
; CHECK-LABEL: expand3
; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
; CHECK-LABEL: expand4
; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
; CHECK-LABEL: expand5
; CHECK: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
%res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
; CHECK-LABEL: expand6
; CHECK: vexpandps %xmm0
define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
%res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
; CHECK-LABEL: expand7
; CHECK-NOT: vexpand
; CHECK: vmovapd
define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
}
; CHECK-LABEL: expand8
; CHECK-NOT: vexpandps %xmm0
define <4 x float> @expand8(<4 x float> %data) {
%res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res
}
; CHECK-LABEL: expand9
; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
; CHECK-LABEL: expand10
; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)