AVX-512: Floating point conversions for SKX - DAG Lowering.

SKX supports conversion for all FP types. Integer types include doublewords and quardwords.
I added "Legal" status for these nodes and a bunch of tests.
I added "NoVLX" for AVX DAG selection to force VLX instructions selection when VLX is supported.

Differential Revision: http://reviews.llvm.org/D11255



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242637 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2015-07-19 10:17:33 +00:00
parent cb29c8d9cf
commit 5683b550b2
3 changed files with 138 additions and 12 deletions

View File

@ -1354,6 +1354,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget->hasDQI()) { if (Subtarget->hasDQI()) {
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
if (Subtarget->hasVLX()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
}
}
if (Subtarget->hasVLX()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
} }
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);

View File

@ -2073,15 +2073,17 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in { let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
(VCVTDQ2PSrr VR128:$src)>; (VCVTDQ2PSrr VR128:$src)>;
def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
(VCVTDQ2PSrm addr:$src)>; (VCVTDQ2PSrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>; (VCVTTPS2DQrr VR128:$src)>;
@ -2149,7 +2151,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
let Predicates = [HasAVX] in { let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>; (VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
@ -2306,7 +2308,9 @@ let Predicates = [HasAVX] in {
(VCVTDQ2PSYrr VR256:$src)>; (VCVTDQ2PSYrr VR256:$src)>;
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>; (VCVTDQ2PSYrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
// Match fround and fextend for 128/256-bit conversions // Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>; (VCVTPD2PSrr VR128:$src)>;

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
; CHECK-LABEL: sitof32 ; CHECK-LABEL: sitof32
; CHECK: vcvtdq2ps %zmm ; CHECK: vcvtdq2ps %zmm
@ -8,6 +8,70 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind {
ret <16 x float> %b ret <16 x float> %b
} }
; CHECK-LABEL: sltof864
; CHECK: vcvtqq2pd
define <8 x double> @sltof864(<8 x i64> %a) {
%b = sitofp <8 x i64> %a to <8 x double>
ret <8 x double> %b
}
; CHECK-LABEL: sltof464
; CHECK: vcvtqq2pd
define <4 x double> @sltof464(<4 x i64> %a) {
%b = sitofp <4 x i64> %a to <4 x double>
ret <4 x double> %b
}
; CHECK-LABEL: sltof2f32
; CHECK: vcvtqq2ps
define <2 x float> @sltof2f32(<2 x i64> %a) {
%b = sitofp <2 x i64> %a to <2 x float>
ret <2 x float>%b
}
; CHECK-LABEL: sltof4f32_mem
; CHECK: vcvtqq2psy (%rdi)
define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
%a1 = load <4 x i64>, <4 x i64>* %a, align 8
%b = sitofp <4 x i64> %a1 to <4 x float>
ret <4 x float>%b
}
; CHECK-LABEL: f64tosl
; CHECK: vcvttpd2qq
define <4 x i64> @f64tosl(<4 x double> %a) {
%b = fptosi <4 x double> %a to <4 x i64>
ret <4 x i64> %b
}
; CHECK-LABEL: f32tosl
; CHECK: vcvttps2qq
define <4 x i64> @f32tosl(<4 x float> %a) {
%b = fptosi <4 x float> %a to <4 x i64>
ret <4 x i64> %b
}
; CHECK-LABEL: sltof432
; CHECK: vcvtqq2ps
define <4 x float> @sltof432(<4 x i64> %a) {
%b = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
}
; CHECK-LABEL: ultof432
; CHECK: vcvtuqq2ps
define <4 x float> @ultof432(<4 x i64> %a) {
%b = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
}
; CHECK-LABEL: ultof64
; CHECK: vcvtuqq2pd
define <8 x double> @ultof64(<8 x i64> %a) {
%b = uitofp <8 x i64> %a to <8 x double>
ret <8 x double> %b
}
; CHECK-LABEL: fptosi00 ; CHECK-LABEL: fptosi00
; CHECK: vcvttps2dq %zmm ; CHECK: vcvttps2dq %zmm
; CHECK: ret ; CHECK: ret
@ -64,16 +128,39 @@ define <8 x i32> @fptosi01(<8 x double> %a) {
ret <8 x i32> %b ret <8 x i32> %b
} }
; CHECK-LABEL: fptosi03
; CHECK: vcvttpd2dq %ymm
; CHECK: ret
define <4 x i32> @fptosi03(<4 x double> %a) {
%b = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
; CHECK-LABEL: fptrunc00 ; CHECK-LABEL: fptrunc00
; CHECK: vcvtpd2ps %zmm ; CHECK: vcvtpd2ps %zmm
; CHECK-NEXT: vcvtpd2ps %zmm ; CHECK-NEXT: vcvtpd2ps %zmm
; CHECK-NEXT: vinsertf64x4 $1 ; CHECK-NEXT: vinsertf
; CHECK: ret ; CHECK: ret
define <16 x float> @fptrunc00(<16 x double> %b) nounwind { define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
%a = fptrunc <16 x double> %b to <16 x float> %a = fptrunc <16 x double> %b to <16 x float>
ret <16 x float> %a ret <16 x float> %a
} }
; CHECK-LABEL: fptrunc01
; CHECK: vcvtpd2ps %ymm
define <4 x float> @fptrunc01(<4 x double> %b) {
%a = fptrunc <4 x double> %b to <4 x float>
ret <4 x float> %a
}
; CHECK-LABEL: fptrunc02
; CHECK: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
%a = fptrunc <4 x double> %b to <4 x float>
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
ret <4 x float> %c
}
; CHECK-LABEL: fpext00 ; CHECK-LABEL: fpext00
; CHECK: vcvtps2pd %ymm0, %zmm0 ; CHECK: vcvtps2pd %ymm0, %zmm0
; CHECK: ret ; CHECK: ret
@ -82,6 +169,16 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind {
ret <8 x double> %a ret <8 x double> %a
} }
; CHECK-LABEL: fpext01
; CHECK: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
; CHECK: ret
define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
%a = fpext <4 x float> %b to <4 x double>
%mask = fcmp ogt <4 x double>%a1, %b1
%c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer
ret <4 x double> %c
}
; CHECK-LABEL: funcA ; CHECK-LABEL: funcA
; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62 ; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62
; CHECK: ret ; CHECK: ret
@ -257,7 +354,7 @@ define double @uitofp03(i32 %a) nounwind {
} }
; CHECK-LABEL: @sitofp_16i1_float ; CHECK-LABEL: @sitofp_16i1_float
; CHECK: vpbroadcastd ; CHECK: vpmovm2d
; CHECK: vcvtdq2ps ; CHECK: vcvtdq2ps
define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
%mask = icmp slt <16 x i32> %a, zeroinitializer %mask = icmp slt <16 x i32> %a, zeroinitializer
@ -301,7 +398,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
; CHECK-LABEL: @sitofp_8i1_double ; CHECK-LABEL: @sitofp_8i1_double
; CHECK: vpbroadcastq ; CHECK: vpmovm2d
; CHECK: vcvtdq2pd ; CHECK: vcvtdq2pd
define <8 x double> @sitofp_8i1_double(<8 x double> %a) { define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer %cmpres = fcmp ogt <8 x double> %a, zeroinitializer