diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f7e4ac97542..2a8580584f4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14412,6 +14412,22 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx_sqrt_pd_256: return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::x86_avx512_mask_valign_q_512: + case Intrinsic::x86_avx512_mask_valign_d_512: { + EVT VT = Op.getValueType(); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), + MVT::i1, VT.getVectorNumElements()); + assert(MaskVT.isSimple() && "invalid valign mask type"); + // Vector source operands are swapped. + return DAG.getNode(ISD::VSELECT, dl, VT, + DAG.getNode(ISD::BITCAST, dl, MaskVT, + Op.getOperand(5)), + DAG.getNode(X86ISD::VALIGN, dl, VT, + Op.getOperand(2), Op.getOperand(1), + Op.getOperand(3)), + Op.getOperand(4)); + } + // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 3383654ef61..02be95f48da 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4509,22 +4509,6 @@ multiclass avx512_valign(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>; - // Non-masking intrinsic call. - def : Pat<(IntVT - (!cast("int_x86_avx512_mask_valign_"##Suffix##"_512") - RC:$src1, RC:$src2, imm:$src3, - (IntVT (bitconvert (v16i32 immAllZerosV))), -1)), - (!cast(NAME#rri) RC:$src1, RC:$src2, imm:$src3)>; - - // Masking intrinsic call. - def : Pat<(IntVT - (!cast("int_x86_avx512_mask_valign_"##Suffix##"_512") - RC:$src1, RC:$src2, imm:$src3, - RC:$src4, MRC:$mask)), - (!cast(NAME#rrik) RC:$src4, - (COPY_TO_REGCLASS MRC:$mask, KRC), RC:$src1, - RC:$src2, imm:$src3)>; - let mayLoad = 1 in def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 48ec3c00229..a586f32c5d4 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -627,3 +627,12 @@ define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, } declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8) + +define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; CHECK-LABEL: test_maskz_valign_d: +; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] + %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)