diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6a6b20e81d9..1a0b62970cf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6213,7 +6213,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, if (!IsLoad) return SDValue(); - if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) + if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || + (Subtarget->hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index d745ba68bb2..0af4a0a2606 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -65,6 +65,7 @@ enum { TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, + TB_INDEX_4 = 4, TB_INDEX_MASK = 0xf, // Do not insert the reverse map (MemOp -> RegOp) into the table. @@ -1337,6 +1338,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, + // AVX-512{F,VL} foldable instructions + { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 }, + { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 }, + { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 }, + { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 }, + // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, @@ -1521,7 +1528,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE } + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, + // AVX-512 arithmetic instructions + { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, + { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, + { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, + { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, + { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, + { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, + { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, + { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, + { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, + { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, + { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, + { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, + // AVX-512{F,VL} arithmetic instructions 256-bit + { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 }, + { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 }, + { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, + { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, + { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 }, + { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 }, + { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 }, + { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 }, + { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 }, + { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 }, + { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 }, + { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 }, + // AVX-512{F,VL} arithmetic instructions 128-bit + { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 }, + { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 }, + { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, + { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, + { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 }, + { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 }, + { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 }, + { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 }, + { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 }, + { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 }, + { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 }, + { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1534,6 +1580,57 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } + static const X86OpTblEntry OpTbl4[] = { + // AVX-512 foldable instructions + { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 }, + { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 }, + { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, + { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, + { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, + { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 }, + { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, + { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, + { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 }, + { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 }, + { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 }, + { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, + // AVX-512{F,VL} foldable instructions 256-bit + { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 }, + { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 }, + { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, + { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, + { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 }, + { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 }, + { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 }, + { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 }, + { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 }, + { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 }, + { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 }, + { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 }, + // AVX-512{F,VL} foldable instructions 128-bit + { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 }, + { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 }, + { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, + { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, + { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 }, + { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 }, + { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 }, + { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 }, + { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 }, + { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 }, + { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 }, + { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) { + unsigned RegOp = OpTbl4[i].RegOp; + unsigned MemOp = OpTbl4[i].MemOp; + unsigned Flags = OpTbl4[i].Flags; + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + RegOp, MemOp, + // Index 4, folded load + Flags | TB_INDEX_4 | TB_FOLDED_LOAD); + } } void @@ -4249,6 +4346,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2; } else if (i == 3) { OpcodeTablePtr = &RegOp2MemOpTable3; + } else if (i == 4) { + OpcodeTablePtr = &RegOp2MemOpTable4; } // If table selected... diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 57b19589545..5662e86932c 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -152,6 +152,7 @@ class X86InstrInfo final : public X86GenInstrInfo { RegOp2MemOpTableType RegOp2MemOpTable1; RegOp2MemOpTableType RegOp2MemOpTable2; RegOp2MemOpTableType RegOp2MemOpTable3; + RegOp2MemOpTableType RegOp2MemOpTable4; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3093f0bc044..6d98d95584e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3009,6 +3009,7 @@ let Predicates = [HasAVX1Only] in { /// classes below multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { + let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; @@ -3022,6 +3023,7 @@ multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, defm V#NAME#PDY : sse12_fp_packed, PD, VEX_4V, VEX_L; + } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed %p1, %c ret <8 x i64>%d } + +; CHECK-LABEL: test_mask_vaddps +; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = fadd <16 x float> %i, %j + %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vmulps +; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = fmul <16 x float> %i, %j + %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vminps +; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %cmp_res = fcmp olt <16 x float> %i, %j + %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j + %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vminpd +; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, + <8 x double> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %cmp_res = fcmp olt <8 x double> %i, %j + %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j + %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst + ret <8 x double> %r +} + +; CHECK-LABEL: test_mask_vmaxps +; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %cmp_res = fcmp ogt <16 x float> %i, %j + %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j + %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vmaxpd +; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, + <8 x double> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %cmp_res = fcmp ogt <8 x double> %i, %j + %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j + %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst + ret <8 x double> %r +} + +; CHECK-LABEL: test_mask_vsubps +; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = fsub <16 x float> %i, %j + %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vdivps +; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, + <16 x float> %j, <16 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = fdiv <16 x float> %i, %j + %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst + ret <16 x float> %r +} + +; CHECK-LABEL: test_mask_vaddpd +; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, + <8 x double> %j, <8 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %x = fadd <8 x double> %i, %j + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst + ret <8 x double> %r +} + +; CHECK-LABEL: test_maskz_vaddpd +; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}} +; CHECK: ret +define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, + <8 x i64> %mask1) nounwind readnone { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %x = fadd <8 x double> %i, %j + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer + ret <8 x double> %r +} + +; CHECK-LABEL: test_mask_fold_vaddpd +; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}} +; CHECK: ret +define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, + <8 x double>* %j, <8 x i64> %mask1) + nounwind { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %tmp = load <8 x double>* %j, align 8 + %x = fadd <8 x double> %i, %tmp + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst + ret <8 x double> %r +} + +; CHECK-LABEL: test_maskz_fold_vaddpd +; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}} +; CHECK: ret +define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, + <8 x i64> %mask1) nounwind { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %tmp = load <8 x double>* %j, align 8 + %x = fadd <8 x double> %i, %tmp + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer + ret <8 x double> %r +} + +; CHECK-LABEL: test_broadcast_vaddpd +; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}} +; CHECK: ret +define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { + %tmp = load double* %j + %b = insertelement <8 x double> undef, double %tmp, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, + <8 x i32> zeroinitializer + %x = fadd <8 x double> %c, %i + ret <8 x double> %x +} + +; CHECK-LABEL: test_mask_broadcast_vaddpd +; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}} +; CHECK: ret +define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, + double* %j, <8 x i64> %mask1) nounwind { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %b = insertelement <8 x double> undef, double %tmp, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, + <8 x i32> zeroinitializer + %x = fadd <8 x double> %c, %i + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i + ret <8 x double> %r +} + +; CHECK-LABEL: test_maskz_broadcast_vaddpd +; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}} +; CHECK: ret +define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, + <8 x i64> %mask1) nounwind { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %b = insertelement <8 x double> undef, double %tmp, i32 0 + %c = shufflevector <8 x double> %b, <8 x double> undef, + <8 x i32> zeroinitializer + %x = fadd <8 x double> %c, %i + %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer + ret <8 x double> %r +} diff --git a/test/CodeGen/X86/avx512vl-arith.ll b/test/CodeGen/X86/avx512vl-arith.ll index e6fb9aeed2e..1f7da7814cc 100644 --- a/test/CodeGen/X86/avx512vl-arith.ll +++ b/test/CodeGen/X86/avx512vl-arith.ll @@ -149,6 +149,258 @@ define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) { ret <8 x i32> %x } +; CHECK-LABEL: test_vaddpd_256 +; CHECK: vaddpd{{.*}} +; CHECK: ret +define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) { +entry: + %add.i = fadd <4 x double> %x, %y + ret <4 x double> %add.i +} + +; CHECK-LABEL: test_fold_vaddpd_256 +; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} +; CHECK: ret +define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) { +entry: + %add.i = fadd <4 x double> %y, + ret <4 x double> %add.i +} + +; CHECK-LABEL: test_broadcast_vaddpd_256 +; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0 +; CHECK: ret +define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind { + %b = fadd <8 x float> %a, + ret <8 x float> %b +} + +; CHECK-LABEL: test_mask_vaddps_256 +; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %x = fadd <8 x float> %i, %j + %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vmulps_256 +; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %x = fmul <8 x float> %i, %j + %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vminps_256 +; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %cmp_res = fcmp olt <8 x float> %i, %j + %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j + %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vmaxps_256 +; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %cmp_res = fcmp ogt <8 x float> %i, %j + %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j + %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vsubps_256 +; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %x = fsub <8 x float> %i, %j + %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vdivps_256 +; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, + <8 x float> %j, <8 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <8 x i32> %mask1, zeroinitializer + %x = fdiv <8 x float> %i, %j + %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst + ret <8 x float> %r +} + +; CHECK-LABEL: test_mask_vmulpd_256 +; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %x = fmul <4 x double> %i, %j + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_vminpd_256 +; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %cmp_res = fcmp olt <4 x double> %i, %j + %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j + %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_vmaxpd_256 +; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %cmp_res = fcmp ogt <4 x double> %i, %j + %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j + %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_vsubpd_256 +; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %x = fsub <4 x double> %i, %j + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_vdivpd_256 +; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %x = fdiv <4 x double> %i, %j + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_vaddpd_256 +; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double> %j, <4 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %x = fadd <4 x double> %i, %j + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_maskz_vaddpd_256 +; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}} +; CHECK: ret +define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, + <4 x i64> %mask1) nounwind readnone { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %x = fadd <4 x double> %i, %j + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer + ret <4 x double> %r +} + +; CHECK-LABEL: test_mask_fold_vaddpd_256 +; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}} +; CHECK: ret +define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, + <4 x double>* %j, <4 x i64> %mask1) + nounwind { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %tmp = load <4 x double>* %j + %x = fadd <4 x double> %i, %tmp + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst + ret <4 x double> %r +} + +; CHECK-LABEL: test_maskz_fold_vaddpd_256 +; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}} +; CHECK: ret +define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, + <4 x i64> %mask1) nounwind { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %tmp = load <4 x double>* %j + %x = fadd <4 x double> %i, %tmp + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer + ret <4 x double> %r +} + +; CHECK-LABEL: test_broadcast2_vaddpd_256 +; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}} +; CHECK: ret +define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind { + %tmp = load double* %j + %b = insertelement <4 x double> undef, double %tmp, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, + <4 x i32> zeroinitializer + %x = fadd <4 x double> %c, %i + ret <4 x double> %x +} + +; CHECK-LABEL: test_mask_broadcast_vaddpd_256 +; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}} +; CHECK: ret +define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, + double* %j, <4 x i64> %mask1) nounwind { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %b = insertelement <4 x double> undef, double %tmp, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, + <4 x i32> zeroinitializer + %x = fadd <4 x double> %c, %i + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i + ret <4 x double> %r +} + +; CHECK-LABEL: test_maskz_broadcast_vaddpd_256 +; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}} +; CHECK: ret +define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, + <4 x i64> %mask1) nounwind { + %mask = icmp ne <4 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %b = insertelement <4 x double> undef, double %tmp, i32 0 + %c = shufflevector <4 x double> %b, <4 x double> undef, + <4 x i32> zeroinitializer + %x = fadd <4 x double> %c, %i + %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer + ret <4 x double> %r +} + ; 128-bit ; CHECK-LABEL: vpaddq128_test @@ -289,3 +541,254 @@ define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) { %x = mul <4 x i32> %i, %j ret <4 x i32> %x } + +; CHECK-LABEL: test_vaddpd_128 +; CHECK: vaddpd{{.*}} +; CHECK: ret +define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) { +entry: + %add.i = fadd <2 x double> %x, %y + ret <2 x double> %add.i +} + +; CHECK-LABEL: test_fold_vaddpd_128 +; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} +; CHECK: ret +define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) { +entry: + %add.i = fadd <2 x double> %y, + ret <2 x double> %add.i +} + +; CHECK-LABEL: test_broadcast_vaddpd_128 +; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK: ret +define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind { + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + +; CHECK-LABEL: test_mask_vaddps_128 +; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %x = fadd <4 x float> %i, %j + %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst + ret <4 x float> %r +} + +; CHECK-LABEL: test_mask_vmulps_128 +; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %x = fmul <4 x float> %i, %j + %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst + ret <4 x float> %r +} + +; CHECK-LABEL: test_mask_vminps_128 +; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %cmp_res = fcmp olt <4 x float> %i, %j + %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j + %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst + ret <4 x float> %r +} + +; CHECK-LABEL: test_mask_vmaxps_128 +; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %cmp_res = fcmp ogt <4 x float> %i, %j + %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j + %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst + ret <4 x float> %r +} + +; CHECK-LABEL: test_mask_vsubps_128 +; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %x = fsub <4 x float> %i, %j + %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst + ret <4 x float> %r +} + + +; CHECK-LABEL: test_mask_vdivps_128 +; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, + <4 x float> %j, <4 x i32> %mask1) + nounwind readnone { + %mask = icmp ne <4 x i32> %mask1, zeroinitializer + %x = fdiv <4 x float> %i, %j + %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst + ret <4 x float> %r +} + +; CHECK-LABEL: test_mask_vmulpd_128 +; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %x = fmul <2 x double> %i, %j + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_vminpd_128 +; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %cmp_res = fcmp olt <2 x double> %i, %j + %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j + %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_vmaxpd_128 +; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %cmp_res = fcmp ogt <2 x double> %i, %j + %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j + %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_vsubpd_128 +; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %x = fsub <2 x double> %i, %j + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_vdivpd_128 +; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %x = fdiv <2 x double> %i, %j + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_vaddpd_128 +; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} +; CHECK: ret +define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double> %j, <2 x i64> %mask1) + nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %x = fadd <2 x double> %i, %j + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_maskz_vaddpd_128 +; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}} +; CHECK: ret +define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j, + <2 x i64> %mask1) nounwind readnone { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %x = fadd <2 x double> %i, %j + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer + ret <2 x double> %r +} + +; CHECK-LABEL: test_mask_fold_vaddpd_128 +; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}} +; CHECK: ret +define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, + <2 x double>* %j, <2 x i64> %mask1) + nounwind { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %tmp = load <2 x double>* %j + %x = fadd <2 x double> %i, %tmp + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst + ret <2 x double> %r +} + +; CHECK-LABEL: test_maskz_fold_vaddpd_128 +; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}} +; CHECK: ret +define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j, + <2 x i64> %mask1) nounwind { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %tmp = load <2 x double>* %j + %x = fadd <2 x double> %i, %tmp + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer + ret <2 x double> %r +} + +; CHECK-LABEL: test_broadcast2_vaddpd_128 +; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}} +; CHECK: ret +define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind { + %tmp = load double* %j + %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 + %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 + %x = fadd <2 x double> %j.1, %i + ret <2 x double> %x +} + +; CHECK-LABEL: test_mask_broadcast_vaddpd_128 +; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}} +; CHECK: ret +define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, + double* %j, <2 x i64> %mask1) + nounwind { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 + %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 + %x = fadd <2 x double> %j.1, %i + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i + ret <2 x double> %r +} + +; CHECK-LABEL: test_maskz_broadcast_vaddpd_128 +; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}} +; CHECK: ret +define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j, + <2 x i64> %mask1) nounwind { + %mask = icmp ne <2 x i64> %mask1, zeroinitializer + %tmp = load double* %j + %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 + %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 + %x = fadd <2 x double> %j.1, %i + %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer + ret <2 x double> %r +}