[AVX512] Enable FP arithmetic lowering for AVX512VL subsets.

Added RegOp2MemOpTable4 to transform 4th operand from register to memory in merge-masked versions of instructions. 
Added lowering tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224516 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robert Khasanov 2014-12-18 12:28:22 +00:00
parent e22e2b8798
commit d25d7bb372
6 changed files with 798 additions and 2 deletions

View File

@ -6213,7 +6213,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
if (!IsLoad)
return SDValue();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
(Subtarget->hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match

View File

@ -65,6 +65,7 @@ enum {
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
TB_INDEX_4 = 4,
TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
@ -1337,6 +1338,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
// AVX-512{F,VL} foldable instructions
{ X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
{ X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
{ X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
{ X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
{ X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
@ -1521,7 +1528,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }
{ X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
// AVX-512 arithmetic instructions
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
// AVX-512{F,VL} arithmetic instructions 256-bit
{ X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
{ X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
{ X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
{ X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
{ X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
{ X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
{ X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
{ X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
{ X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
{ X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
// AVX-512{F,VL} arithmetic instructions 128-bit
{ X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
{ X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
{ X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
{ X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
{ X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
{ X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
{ X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
{ X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@ -1534,6 +1580,57 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
static const X86OpTblEntry OpTbl4[] = {
// AVX-512 foldable instructions
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
// AVX-512{F,VL} foldable instructions 256-bit
{ X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
{ X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
{ X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
{ X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
{ X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
{ X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
{ X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
{ X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
{ X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
{ X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
// AVX-512{F,VL} foldable instructions 128-bit
{ X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
{ X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
{ X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
{ X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
{ X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
{ X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
{ X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
{ X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) {
unsigned RegOp = OpTbl4[i].RegOp;
unsigned MemOp = OpTbl4[i].MemOp;
unsigned Flags = OpTbl4[i].Flags;
AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
RegOp, MemOp,
// Index 4, folded load
Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
}
void
@ -4249,6 +4346,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2;
} else if (i == 3) {
OpcodeTablePtr = &RegOp2MemOpTable3;
} else if (i == 4) {
OpcodeTablePtr = &RegOp2MemOpTable4;
}
// If table selected...

View File

@ -152,6 +152,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
RegOp2MemOpTableType RegOp2MemOpTable1;
RegOp2MemOpTableType RegOp2MemOpTable2;
RegOp2MemOpTableType RegOp2MemOpTable3;
RegOp2MemOpTableType RegOp2MemOpTable4;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///

View File

@ -3009,6 +3009,7 @@ let Predicates = [HasAVX1Only] in {
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
@ -3022,6 +3023,7 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,

View File

@ -462,3 +462,193 @@ entry:
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
}
; CHECK-LABEL: test_mask_vaddps
; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fadd <16 x float> %i, %j
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vmulps
; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fmul <16 x float> %i, %j
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vminps
; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <16 x float> %i, %j
%min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
%r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vminpd
; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
<8 x double> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <8 x double> %i, %j
%min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
%r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
ret <8 x double> %r
}
; CHECK-LABEL: test_mask_vmaxps
; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <16 x float> %i, %j
%max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
%r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vmaxpd
; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
<8 x double> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <8 x double> %i, %j
%max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
%r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
ret <8 x double> %r
}
; CHECK-LABEL: test_mask_vsubps
; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fsub <16 x float> %i, %j
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vdivps
; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fdiv <16 x float> %i, %j
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
ret <16 x float> %r
}
; CHECK-LABEL: test_mask_vaddpd
; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
<8 x double> %j, <8 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%x = fadd <8 x double> %i, %j
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
ret <8 x double> %r
}
; CHECK-LABEL: test_maskz_vaddpd
; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
; CHECK: ret
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
<8 x i64> %mask1) nounwind readnone {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%x = fadd <8 x double> %i, %j
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
}
; CHECK-LABEL: test_mask_fold_vaddpd
; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
; CHECK: ret
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
<8 x double>* %j, <8 x i64> %mask1)
nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load <8 x double>* %j, align 8
%x = fadd <8 x double> %i, %tmp
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
ret <8 x double> %r
}
; CHECK-LABEL: test_maskz_fold_vaddpd
; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
; CHECK: ret
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load <8 x double>* %j, align 8
%x = fadd <8 x double> %i, %tmp
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
}
; CHECK-LABEL: test_broadcast_vaddpd
; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
; CHECK: ret
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
%tmp = load double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
%x = fadd <8 x double> %c, %i
ret <8 x double> %x
}
; CHECK-LABEL: test_mask_broadcast_vaddpd
; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
; CHECK: ret
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
double* %j, <8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
%x = fadd <8 x double> %c, %i
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
ret <8 x double> %r
}
; CHECK-LABEL: test_maskz_broadcast_vaddpd
; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
; CHECK: ret
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
%x = fadd <8 x double> %c, %i
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
}

View File

@ -149,6 +149,258 @@ define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
ret <8 x i32> %x
}
; CHECK-LABEL: test_vaddpd_256
; CHECK: vaddpd{{.*}}
; CHECK: ret
define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
entry:
%add.i = fadd <4 x double> %x, %y
ret <4 x double> %add.i
}
; CHECK-LABEL: test_fold_vaddpd_256
; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
; CHECK: ret
define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
entry:
%add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
ret <4 x double> %add.i
}
; CHECK-LABEL: test_broadcast_vaddpd_256
; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK: ret
define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
%b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <8 x float> %b
}
; CHECK-LABEL: test_mask_vaddps_256
; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%x = fadd <8 x float> %i, %j
%r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vmulps_256
; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%x = fmul <8 x float> %i, %j
%r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vminps_256
; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <8 x float> %i, %j
%min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
%r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vmaxps_256
; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <8 x float> %i, %j
%max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
%r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vsubps_256
; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%x = fsub <8 x float> %i, %j
%r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vdivps_256
; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
<8 x float> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%x = fdiv <8 x float> %i, %j
%r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
ret <8 x float> %r
}
; CHECK-LABEL: test_mask_vmulpd_256
; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%x = fmul <4 x double> %i, %j
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_vminpd_256
; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%cmp_res = fcmp olt <4 x double> %i, %j
%min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
%r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_vmaxpd_256
; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%cmp_res = fcmp ogt <4 x double> %i, %j
%max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
%r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_vsubpd_256
; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%x = fsub <4 x double> %i, %j
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_vdivpd_256
; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%x = fdiv <4 x double> %i, %j
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_vaddpd_256
; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double> %j, <4 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%x = fadd <4 x double> %i, %j
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_maskz_vaddpd_256
; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
; CHECK: ret
define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
<4 x i64> %mask1) nounwind readnone {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%x = fadd <4 x double> %i, %j
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
ret <4 x double> %r
}
; CHECK-LABEL: test_mask_fold_vaddpd_256
; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
; CHECK: ret
define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
<4 x double>* %j, <4 x i64> %mask1)
nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%tmp = load <4 x double>* %j
%x = fadd <4 x double> %i, %tmp
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
}
; CHECK-LABEL: test_maskz_fold_vaddpd_256
; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
; CHECK: ret
define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
<4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%tmp = load <4 x double>* %j
%x = fadd <4 x double> %i, %tmp
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
ret <4 x double> %r
}
; CHECK-LABEL: test_broadcast2_vaddpd_256
; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
; CHECK: ret
define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
%tmp = load double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
%x = fadd <4 x double> %c, %i
ret <4 x double> %x
}
; CHECK-LABEL: test_mask_broadcast_vaddpd_256
; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
; CHECK: ret
define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
double* %j, <4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
%x = fadd <4 x double> %c, %i
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
ret <4 x double> %r
}
; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
; CHECK: ret
define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
<4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
%x = fadd <4 x double> %c, %i
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
ret <4 x double> %r
}
; 128-bit
; CHECK-LABEL: vpaddq128_test
@ -289,3 +541,254 @@ define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
%x = mul <4 x i32> %i, %j
ret <4 x i32> %x
}
; CHECK-LABEL: test_vaddpd_128
; CHECK: vaddpd{{.*}}
; CHECK: ret
define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
entry:
%add.i = fadd <2 x double> %x, %y
ret <2 x double> %add.i
}
; CHECK-LABEL: test_fold_vaddpd_128
; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
; CHECK: ret
define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
entry:
%add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
ret <2 x double> %add.i
}
; CHECK-LABEL: test_broadcast_vaddpd_128
; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK: ret
define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
%b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <4 x float> %b
}
; CHECK-LABEL: test_mask_vaddps_128
; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%x = fadd <4 x float> %i, %j
%r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vmulps_128
; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%x = fmul <4 x float> %i, %j
%r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vminps_128
; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <4 x float> %i, %j
%min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
%r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vmaxps_128
; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <4 x float> %i, %j
%max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
%r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vsubps_128
; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%x = fsub <4 x float> %i, %j
%r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vdivps_128
; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
<4 x float> %j, <4 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%x = fdiv <4 x float> %i, %j
%r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
ret <4 x float> %r
}
; CHECK-LABEL: test_mask_vmulpd_128
; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%x = fmul <2 x double> %i, %j
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_vminpd_128
; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%cmp_res = fcmp olt <2 x double> %i, %j
%min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
%r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_vmaxpd_128
; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%cmp_res = fcmp ogt <2 x double> %i, %j
%max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
%r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_vsubpd_128
; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%x = fsub <2 x double> %i, %j
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_vdivpd_128
; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%x = fdiv <2 x double> %i, %j
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_vaddpd_128
; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
; CHECK: ret
define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double> %j, <2 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%x = fadd <2 x double> %i, %j
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_maskz_vaddpd_128
; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
; CHECK: ret
define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
<2 x i64> %mask1) nounwind readnone {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%x = fadd <2 x double> %i, %j
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
ret <2 x double> %r
}
; CHECK-LABEL: test_mask_fold_vaddpd_128
; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
; CHECK: ret
define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
<2 x double>* %j, <2 x i64> %mask1)
nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%tmp = load <2 x double>* %j
%x = fadd <2 x double> %i, %tmp
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
}
; CHECK-LABEL: test_maskz_fold_vaddpd_128
; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
; CHECK: ret
define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
<2 x i64> %mask1) nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%tmp = load <2 x double>* %j
%x = fadd <2 x double> %i, %tmp
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
ret <2 x double> %r
}
; CHECK-LABEL: test_broadcast2_vaddpd_128
; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
; CHECK: ret
define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
%tmp = load double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
ret <2 x double> %x
}
; CHECK-LABEL: test_mask_broadcast_vaddpd_128
; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
; CHECK: ret
define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
double* %j, <2 x i64> %mask1)
nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
ret <2 x double> %r
}
; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
; CHECK: ret
define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
<2 x i64> %mask1) nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%tmp = load double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
ret <2 x double> %r
}