AVX-512: Added intrinsic for cvtph2ps.

Added VPTESTNM instruction.
Added a pattern to vselect (lit tests will follow).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200823 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2014-02-05 07:05:03 +00:00
parent 3211c86b96
commit 002683abc7
6 changed files with 85 additions and 42 deletions

View File

@ -2648,6 +2648,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@ -2756,12 +2762,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx512_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Vector convert

View File

@ -312,8 +312,9 @@ namespace llvm {
// TESTP - Vector packed fp sign bitwise comparisons.
TESTP,
// TESTM - Vector "test" in AVX-512, the result is in a mask vector.
// TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
TESTM,
TESTNM,
// OR/AND test for masks
KORTEST,

View File

@ -613,13 +613,13 @@ defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512me
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERM2D : avx512_perm_3src<0x7E, "vperm2d", VR512, memopv16i32, i512mem,
defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPERM2Q : avx512_perm_3src<0x7E, "vperm2q", VR512, memopv8i64, i512mem,
defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERM2PS : avx512_perm_3src<0x7F, "vperm2ps", VR512, memopv16f32, i512mem,
defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPERM2PD : avx512_perm_3src<0x7F, "vperm2pd", VR512, memopv8f64, i512mem,
defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@ -1332,6 +1332,11 @@ let Constraints = "$src1 = $dst" in {
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
}
def rrkz : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src),
!strconcat(asm,
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), []>,
EVEX, EVEX_KZ;
}
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
@ -1351,6 +1356,23 @@ def : Pat<(store (v16i32 VR512:$src), addr:$dst),
(VMOVDQU32mr addr:$dst, VR512:$src)>;
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)))),
(VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
(VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
(v16i32 immAllZerosV))),
(VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
(v16f32 VR512:$src2))),
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
@ -2118,24 +2140,34 @@ def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
SDNode OpNode, ValueType vt> {
def rr : AVX5128I<opc, MRMSrcReg,
def rr : AVX512PI<opc, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
def rm : AVX5128I<opc, MRMSrcMem,
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
SSEPackedInt>, EVEX_4V;
def rm : AVX512PI<opc, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1),
(bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
(bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
}
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
memopv16i32, X86testm, v16i32>, EVEX_V512,
memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
memopv8i64, X86testm, v8i64>, T8XS, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasCDI] in {
defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
memopv8i64, X86testnm, v8i64>, T8PD, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
}
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (i16 -1))),
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
@ -2997,35 +3029,41 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
X86MemOperand x86memop, Intrinsic Int> {
multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
X86MemOperand x86memop> {
def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
[]>, EVEX;
let hasSideEffects = 0, mayLoad = 1 in
def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
}
multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
X86MemOperand x86memop, Intrinsic Int> {
multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
X86MemOperand x86memop> {
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
(ins srcRC:$src1, i32i8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
"vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX;
let hasSideEffects = 0, mayStore = 1 in
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
"vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
}
defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
(VCVTPS2PHZrr VR512:$src, imm:$rc)>;
def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
(bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
(VCVTPH2PSZrr VR256X:$src)>;
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, TB, EVEX, VEX_LIG,

View File

@ -177,6 +177,9 @@ def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>>;
def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>>;
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",

View File

@ -220,19 +220,20 @@ define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
; CHECK: vcvtph2ps
%res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0)
; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
; CHECK: vcvtps2ph
%res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0)
; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
%res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly
declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
; CHECK: vbroadcastss

View File

@ -49,7 +49,7 @@ define <8 x double> @test4(<8 x double> %a) nounwind {
}
; CHECK-LABEL: test5:
; CHECK: vperm2pd
; CHECK: vpermt2pd
; CHECK: ret
define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
@ -65,7 +65,7 @@ define <8 x i64> @test6(<8 x i64> %a) nounwind {
}
; CHECK-LABEL: test7:
; CHECK: vperm2q
; CHECK: vpermt2q
; CHECK: ret
define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
%c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
@ -73,7 +73,7 @@ define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
}
; CHECK-LABEL: test8:
; CHECK: vperm2d
; CHECK: vpermt2d
; CHECK: ret
define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
@ -81,7 +81,7 @@ define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
}
; CHECK-LABEL: test9:
; CHECK: vperm2ps
; CHECK: vpermt2ps
; CHECK: ret
define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
%c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
@ -89,7 +89,7 @@ define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
}
; CHECK-LABEL: test10:
; CHECK: vperm2ps (
; CHECK: vpermt2ps (
; CHECK: ret
define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
%c = load <16 x float>* %b
@ -98,7 +98,7 @@ define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
}
; CHECK-LABEL: test11:
; CHECK: vperm2d
; CHECK: vpermt2d
; CHECK: ret
define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
%c = load <16 x i32>* %b
@ -202,7 +202,7 @@ define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
}
; CHECK-LABEL: @test24
; CHECK: vperm2d
; CHECK: vpermt2d
; CHECK: ret
define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>