diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 057d551f993..90eb7d91899 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2191,6 +2191,193 @@ defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +//===----------------------------------------------------------------------===// +// AVX-512 Scalar convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { + def rr : SI, EVEX_4V; + let mayLoad = 1 in + def rm : SI, EVEX_4V; +} // neverHasSideEffects = 1 +} + +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, + XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, + XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, + XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; + +def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; + +def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + +//===----------------------------------------------------------------------===// +// AVX-512 Convert form float to double and back +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in { +def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), + (ins FR32X:$src1, FR32X:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), + (ins FR32X:$src1, f32mem:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_CD8<32, CD8VT1>; + +// Convert scalar double to scalar single +def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), + (ins FR64X:$src1, FR64X:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), + (ins FR64X:$src1, f64mem:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, + Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, + Requires<[HasAVX512]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX512, OptForSize]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, + Requires<[HasAVX512, OptForSpeed]>; + +def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, + Requires<[HasAVX512]>; + +multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT, + Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + +defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, + memopv8f64, f512mem, v8f32, v8f64, + SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + EVEX_CD8<64, CD8VF>; + +defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, + memopv4f64, f256mem, v8f64, v8f32, + SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; +def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, + memopv8i64, i512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, + memopv4i64, i256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, XS, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, + memopv4i64, f256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, + memopv16i32, f512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, XD, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + + +def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), + (VCVTDQ2PSZrr VR512:$src)>; +def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), + (VCVTDQ2PSZrm addr:$src)>; + +def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], + IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; +def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + +let Predicates = [HasAVX512] in { + def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), + (VCVTPD2PSZrm addr:$src)>; + def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; +} //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9b27e27e8de..2d7ac73bbe6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1440,7 +1440,7 @@ let neverHasSideEffects = 1 in { multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def rr : SI, Sched<[WriteCvtI2F]>; @@ -1452,6 +1452,7 @@ let neverHasSideEffects = 1 in { } // neverHasSideEffects = 1 } +let Predicates = [UseAVX] in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, @@ -1485,7 +1486,7 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; - +} // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly @@ -1499,12 +1500,12 @@ defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; -def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", +let Predicates = [UseAVX] in { + def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; -def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", + def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; -let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1606,19 +1607,21 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } +let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; - +} defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; +let Predicates = [UseAVX] in { defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, VEX_4V; @@ -1633,7 +1636,7 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; - +} let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, @@ -1652,6 +1655,7 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics +let Predicates = [UseAVX] in { defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS, VEX; @@ -1666,6 +1670,7 @@ defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W; +} defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; @@ -1679,13 +1684,14 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; +let Predicates = [UseAVX] in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; - +} defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS; @@ -1707,6 +1713,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, SSEPackedSingle, SSE_CVT_PS>, TB, Requires<[UseSSE2]>; +let Predicates = [UseAVX] in { def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", @@ -1723,6 +1730,7 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; +} def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; @@ -1744,7 +1752,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", /// SSE 2 Only // Convert scalar double to scalar single -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -1760,7 +1768,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, - Requires<[HasAVX]>; + Requires<[UseAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", @@ -1778,14 +1786,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { @@ -1807,7 +1815,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, // Convert scalar single to scalar double // SSE2 instructions with XS prefix -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1824,16 +1832,16 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), } def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; def : Pat<(extloadf32 addr:$src), (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; + Requires<[UseAVX, OptForSize]>; def : Pat<(extloadf32 addr:$src), (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; + Requires<[UseAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1861,14 +1869,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll new file mode 100644 index 00000000000..543bb5eac14 --- /dev/null +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -0,0 +1,186 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; CHECK-LABEL: sitof32 +; CHECK: vcvtdq2ps %zmm +; CHECK: ret +define <16 x float> @sitof32(<16 x i32> %a) nounwind { + %b = sitofp <16 x i32> %a to <16 x float> + ret <16 x float> %b +} + +; CHECK-LABEL: fptosi00 +; CHECK: vcvttps2dq %zmm +; CHECK: ret +define <16 x i32> @fptosi00(<16 x float> %a) nounwind { + %b = fptosi <16 x float> %a to <16 x i32> + ret <16 x i32> %b +} + +; CHECK-LABEL: fptoui00 +; CHECK: vcvttps2udq +; CHECK: ret +define <16 x i32> @fptoui00(<16 x float> %a) nounwind { + %b = fptoui <16 x float> %a to <16 x i32> + ret <16 x i32> %b +} + +; CHECK-LABEL: fptoui01 +; CHECK: vcvttpd2udq +; CHECK: ret +define <8 x i32> @fptoui01(<8 x double> %a) nounwind { + %b = fptoui <8 x double> %a to <8 x i32> + ret <8 x i32> %b +} + +; CHECK-LABEL: sitof64 +; CHECK: vcvtdq2pd %ymm +; CHECK: ret +define <8 x double> @sitof64(<8 x i32> %a) { + %b = sitofp <8 x i32> %a to <8 x double> + ret <8 x double> %b +} + +; CHECK-LABEL: fptosi01 +; CHECK: vcvttpd2dq %zmm +; CHECK: ret +define <8 x i32> @fptosi01(<8 x double> %a) { + %b = fptosi <8 x double> %a to <8 x i32> + ret <8 x i32> %b +} + +; CHECK-LABEL: fptrunc00 +; CHECK: vcvtpd2ps %zmm +; CHECK-NEXT: vcvtpd2ps %zmm +; CHECK-NEXT: vinsertf64x4 $1 +; CHECK: ret +define <16 x float> @fptrunc00(<16 x double> %b) nounwind { + %a = fptrunc <16 x double> %b to <16 x float> + ret <16 x float> %a +} + +; CHECK-LABEL: fpext00 +; CHECK: vcvtps2pd %ymm0, %zmm0 +; CHECK: ret +define <8 x double> @fpext00(<8 x float> %b) nounwind { + %a = fpext <8 x float> %b to <8 x double> + ret <8 x double> %a +} + +; CHECK-LABEL: funcA +; CHECK: vcvtsi2sdqz (% +; CHECK: ret +define double @funcA(i64* nocapture %e) { +entry: + %tmp1 = load i64* %e, align 8 + %conv = sitofp i64 %tmp1 to double + ret double %conv +} + +; CHECK-LABEL: funcB +; CHECK: vcvtsi2sdlz (% +; CHECK: ret +define double @funcB(i32* %e) { +entry: + %tmp1 = load i32* %e, align 4 + %conv = sitofp i32 %tmp1 to double + ret double %conv +} + +; CHECK-LABEL: funcC +; CHECK: vcvtsi2sslz (% +; CHECK: ret +define float @funcC(i32* %e) { +entry: + %tmp1 = load i32* %e, align 4 + %conv = sitofp i32 %tmp1 to float + ret float %conv +} + +; CHECK-LABEL: i64tof32 +; CHECK: vcvtsi2ssqz (% +; CHECK: ret +define float @i64tof32(i64* %e) { +entry: + %tmp1 = load i64* %e, align 8 + %conv = sitofp i64 %tmp1 to float + ret float %conv +} + +; CHECK-LABEL: fpext +; CHECK: vcvtss2sdz +; CHECK: ret +define void @fpext() { +entry: + %f = alloca float, align 4 + %d = alloca double, align 8 + %tmp = load float* %f, align 4 + %conv = fpext float %tmp to double + store double %conv, double* %d, align 8 + ret void +} + +; CHECK-LABEL: fpround_scalar +; CHECK: vmovsdz +; CHECK: vcvtsd2ssz +; CHECK: vmovssz +; CHECK: ret +define void @fpround_scalar() nounwind uwtable { +entry: + %f = alloca float, align 4 + %d = alloca double, align 8 + %tmp = load double* %d, align 8 + %conv = fptrunc double %tmp to float + store float %conv, float* %f, align 4 + ret void +} + +; CHECK-LABEL: long_to_double +; CHECK: vmovqz +; CHECK: ret +define double @long_to_double(i64 %x) { + %res = bitcast i64 %x to double + ret double %res +} + +; CHECK-LABEL: double_to_long +; CHECK: vmovqz +; CHECK: ret +define i64 @double_to_long(double %x) { + %res = bitcast double %x to i64 + ret i64 %res +} + +; CHECK-LABEL: int_to_float +; CHECK: vmovdz +; CHECK: ret +define float @int_to_float(i32 %x) { + %res = bitcast i32 %x to float + ret float %res +} + +; CHECK-LABEL: float_to_int +; CHECK: vmovdz +; CHECK: ret +define i32 @float_to_int(float %x) { + %res = bitcast float %x to i32 + ret i32 %res +} + +; CHECK-LABEL: uitof64 +; CHECK: vextracti64x4 +; CHECK: vcvtudq2pd +; CHECK: vcvtudq2pd +; CHECK: ret +define <16 x double> @uitof64(<16 x i32> %a) nounwind { + %b = uitofp <16 x i32> %a to <16 x double> + ret <16 x double> %b +} + +; CHECK-LABEL: uitof32 +; CHECK: vcvtudq2ps +; CHECK: ret +define <16 x float> @uitof32(<16 x i32> %a) nounwind { + %b = uitofp <16 x i32> %a to <16 x float> + ret <16 x float> %b +} +