AVX-512: added conversion instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189349 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2013-08-27 13:54:04 +00:00
parent 5d8c2e460c
commit d371d4f0f4
3 changed files with 400 additions and 19 deletions

View File

@ -2191,6 +2191,193 @@ defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X,
defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X,
f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
//===----------------------------------------------------------------------===//
multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
} // neverHasSideEffects = 1
}
defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp GR32:$src)),
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f32 (sint_to_fp GR64:$src)),
(VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
def : Pat<(f64 (sint_to_fp GR32:$src)),
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
(ins FR32X:$src1, FR32X:$src2),
"vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
(ins FR32X:$src1, f32mem:$src2),
"vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
EVEX_CD8<32, CD8VT1>;
// Convert scalar double to scalar single
def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
(ins FR64X:$src1, FR64X:$src2),
"vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
(ins FR64X:$src1, f64mem:$src2),
"vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, VEX_W,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
}
def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(fextend (loadf32 addr:$src)),
(VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
Requires<[HasAVX512]>;
multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
Domain d> {
let neverHasSideEffects = 1 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
let mayLoad = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
} // neverHasSideEffects = 1
}
defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
memopv8f64, f512mem, v8f32, v8f64,
SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
EVEX_CD8<64, CD8VF>;
defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
memopv4f64, f256mem, v8f64, v8f32,
SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from sign integer to float/double
//===----------------------------------------------------------------------===//
defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
memopv8i64, i512mem, v16f32, v16i32,
SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
memopv4i64, i256mem, v8f64, v8i32,
SSEPackedDouble>, EVEX_V512, XS,
EVEX_CD8<32, CD8VH>;
defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
memopv16f32, f512mem, v16i32, v16f32,
SSEPackedSingle>, EVEX_V512, XS,
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
memopv16f32, f512mem, v16i32, v16f32,
SSEPackedSingle>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
memopv4i64, f256mem, v8f64, v8i32,
SSEPackedDouble>, EVEX_V512, XS,
EVEX_CD8<32, CD8VH>;
defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
memopv16i32, f512mem, v16f32, v16i32,
SSEPackedSingle>, EVEX_V512, XD,
EVEX_CD8<32, CD8VF>;
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src),
(VCVTDQ2PSZrr VR512:$src)>;
def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))),
(VCVTDQ2PSZrm addr:$src)>;
def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
"vcvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR512:$dst,
(int_x86_avx512_cvt_ps2dq_512 VR512:$src))],
IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512;
def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
"vcvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR512:$dst,
(int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations // VSHUFPS - VSHUFPD Operations

View File

@ -1440,7 +1440,7 @@ let neverHasSideEffects = 1 in {
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> { X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[WriteCvtI2F]>; Sched<[WriteCvtI2F]>;
@ -1452,6 +1452,7 @@ let neverHasSideEffects = 1 in {
} // neverHasSideEffects = 1 } // neverHasSideEffects = 1
} }
let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}", "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, SSE_CVT_SS2SI_32>,
@ -1485,7 +1486,7 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx // The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands, // register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly // provide other assembly "l" and "q" forms to address this explicitly
@ -1499,12 +1500,12 @@ defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG; XD, VEX_4V, VEX_W, VEX_LIG;
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", let Predicates = [UseAVX] in {
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@ -1606,19 +1607,21 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
} }
let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
let Predicates = [UseAVX] in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V; SSE_CVT_Scalar, 0>, XS, VEX_4V;
@ -1633,7 +1636,7 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD, SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W; VEX_4V, VEX_W;
}
let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, int_x86_sse_cvtsi2ss, i32mem, loadi32,
@ -1652,6 +1655,7 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only /// SSE 1 Only
// Aliases for intrinsics // Aliases for intrinsics
let Predicates = [UseAVX] in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si", ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX; SSE_CVT_SS2SI_32>, XS, VEX;
@ -1666,6 +1670,7 @@ defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>, "cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W; XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si", ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS; SSE_CVT_SS2SI_32>, XS;
@ -1679,13 +1684,14 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
let Predicates = [UseAVX] in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si", ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si", ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
}
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si", ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS; SSE_CVT_SS2SI_32>, XS;
@ -1707,6 +1713,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>, SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>; TB, Requires<[UseSSE2]>;
let Predicates = [UseAVX] in {
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@ -1723,6 +1730,7 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
}
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrr GR32:$dst, VR128:$src), 0>; (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
@ -1744,7 +1752,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only /// SSE 2 Only
// Convert scalar double to scalar single // Convert scalar double to scalar single
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2), (ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@ -1760,7 +1768,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
} }
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>; Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}", "cvtsd2ss\t{$src, $dst|$dst, $src}",
@ -1778,14 +1786,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>; Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))], VR128:$src1, sse_load_f64:$src2))],
IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>; Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in {
@ -1807,7 +1815,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
// Convert scalar single to scalar double // Convert scalar single to scalar double
// SSE2 instructions with XS prefix // SSE2 instructions with XS prefix
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2), (ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@ -1824,16 +1832,16 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
} }
def : Pat<(f64 (fextend FR32:$src)), def : Pat<(f64 (fextend FR32:$src)),
(VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fextend (loadf32 addr:$src)), def : Pat<(fextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src), def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX, OptForSize]>; Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src), def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[HasAVX, OptForSpeed]>; Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}", "cvtss2sd\t{$src, $dst|$dst, $src}",
@ -1861,14 +1869,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>; Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>; Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,

View File

@ -0,0 +1,186 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; CHECK-LABEL: sitof32
; CHECK: vcvtdq2ps %zmm
; CHECK: ret
define <16 x float> @sitof32(<16 x i32> %a) nounwind {
%b = sitofp <16 x i32> %a to <16 x float>
ret <16 x float> %b
}
; CHECK-LABEL: fptosi00
; CHECK: vcvttps2dq %zmm
; CHECK: ret
define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
%b = fptosi <16 x float> %a to <16 x i32>
ret <16 x i32> %b
}
; CHECK-LABEL: fptoui00
; CHECK: vcvttps2udq
; CHECK: ret
define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
%b = fptoui <16 x float> %a to <16 x i32>
ret <16 x i32> %b
}
; CHECK-LABEL: fptoui01
; CHECK: vcvttpd2udq
; CHECK: ret
define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
%b = fptoui <8 x double> %a to <8 x i32>
ret <8 x i32> %b
}
; CHECK-LABEL: sitof64
; CHECK: vcvtdq2pd %ymm
; CHECK: ret
define <8 x double> @sitof64(<8 x i32> %a) {
%b = sitofp <8 x i32> %a to <8 x double>
ret <8 x double> %b
}
; CHECK-LABEL: fptosi01
; CHECK: vcvttpd2dq %zmm
; CHECK: ret
define <8 x i32> @fptosi01(<8 x double> %a) {
%b = fptosi <8 x double> %a to <8 x i32>
ret <8 x i32> %b
}
; CHECK-LABEL: fptrunc00
; CHECK: vcvtpd2ps %zmm
; CHECK-NEXT: vcvtpd2ps %zmm
; CHECK-NEXT: vinsertf64x4 $1
; CHECK: ret
define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
%a = fptrunc <16 x double> %b to <16 x float>
ret <16 x float> %a
}
; CHECK-LABEL: fpext00
; CHECK: vcvtps2pd %ymm0, %zmm0
; CHECK: ret
define <8 x double> @fpext00(<8 x float> %b) nounwind {
%a = fpext <8 x float> %b to <8 x double>
ret <8 x double> %a
}
; CHECK-LABEL: funcA
; CHECK: vcvtsi2sdqz (%
; CHECK: ret
define double @funcA(i64* nocapture %e) {
entry:
%tmp1 = load i64* %e, align 8
%conv = sitofp i64 %tmp1 to double
ret double %conv
}
; CHECK-LABEL: funcB
; CHECK: vcvtsi2sdlz (%
; CHECK: ret
define double @funcB(i32* %e) {
entry:
%tmp1 = load i32* %e, align 4
%conv = sitofp i32 %tmp1 to double
ret double %conv
}
; CHECK-LABEL: funcC
; CHECK: vcvtsi2sslz (%
; CHECK: ret
define float @funcC(i32* %e) {
entry:
%tmp1 = load i32* %e, align 4
%conv = sitofp i32 %tmp1 to float
ret float %conv
}
; CHECK-LABEL: i64tof32
; CHECK: vcvtsi2ssqz (%
; CHECK: ret
define float @i64tof32(i64* %e) {
entry:
%tmp1 = load i64* %e, align 8
%conv = sitofp i64 %tmp1 to float
ret float %conv
}
; CHECK-LABEL: fpext
; CHECK: vcvtss2sdz
; CHECK: ret
define void @fpext() {
entry:
%f = alloca float, align 4
%d = alloca double, align 8
%tmp = load float* %f, align 4
%conv = fpext float %tmp to double
store double %conv, double* %d, align 8
ret void
}
; CHECK-LABEL: fpround_scalar
; CHECK: vmovsdz
; CHECK: vcvtsd2ssz
; CHECK: vmovssz
; CHECK: ret
define void @fpround_scalar() nounwind uwtable {
entry:
%f = alloca float, align 4
%d = alloca double, align 8
%tmp = load double* %d, align 8
%conv = fptrunc double %tmp to float
store float %conv, float* %f, align 4
ret void
}
; CHECK-LABEL: long_to_double
; CHECK: vmovqz
; CHECK: ret
define double @long_to_double(i64 %x) {
%res = bitcast i64 %x to double
ret double %res
}
; CHECK-LABEL: double_to_long
; CHECK: vmovqz
; CHECK: ret
define i64 @double_to_long(double %x) {
%res = bitcast double %x to i64
ret i64 %res
}
; CHECK-LABEL: int_to_float
; CHECK: vmovdz
; CHECK: ret
define float @int_to_float(i32 %x) {
%res = bitcast i32 %x to float
ret float %res
}
; CHECK-LABEL: float_to_int
; CHECK: vmovdz
; CHECK: ret
define i32 @float_to_int(float %x) {
%res = bitcast float %x to i32
ret i32 %res
}
; CHECK-LABEL: uitof64
; CHECK: vextracti64x4
; CHECK: vcvtudq2pd
; CHECK: vcvtudq2pd
; CHECK: ret
define <16 x double> @uitof64(<16 x i32> %a) nounwind {
%b = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %b
}
; CHECK-LABEL: uitof32
; CHECK: vcvtudq2ps
; CHECK: ret
define <16 x float> @uitof32(<16 x i32> %a) nounwind {
%b = uitofp <16 x i32> %a to <16 x float>
ret <16 x float> %b
}