mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 21:35:07 +00:00
AVX-512: added scalar convert instructions and intrinsics.
Fixed load folding in VPERM2I instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
26ba5df2eb
commit
714319a169
@ -206,6 +206,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_i64_ty], [IntrNoMem]>;
|
||||
// avx-512 for unsigned conversion
|
||||
def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
|
||||
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
|
||||
@ -484,6 +500,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_v2f64_ty], [IntrNoMem]>;
|
||||
|
@ -1330,7 +1330,16 @@ void X86TargetLowering::resetOperationActions() {
|
||||
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
|
||||
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
|
||||
if (Subtarget->is64Bit()) {
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
|
||||
}
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
|
||||
|
@ -2215,42 +2215,208 @@ multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
X86MemOperand x86memop, string asm> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
|
||||
EVEX_4V;
|
||||
let mayLoad = 1 in
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
|
||||
EVEX_4V;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
|
||||
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
|
||||
defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
|
||||
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
|
||||
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
|
||||
defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
|
||||
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
|
||||
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
|
||||
(VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
|
||||
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
|
||||
(VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||
(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||
|
||||
def : Pat<(f32 (sint_to_fp GR32:$src)),
|
||||
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
|
||||
def : Pat<(f32 (sint_to_fp GR64:$src)),
|
||||
(VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
def : Pat<(f64 (sint_to_fp GR32:$src)),
|
||||
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
||||
def : Pat<(f64 (sint_to_fp GR64:$src)),
|
||||
(VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
|
||||
defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">,
|
||||
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">,
|
||||
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">,
|
||||
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">,
|
||||
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
|
||||
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
|
||||
(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
|
||||
(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
|
||||
(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||
|
||||
def : Pat<(f32 (uint_to_fp GR32:$src)),
|
||||
(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
|
||||
def : Pat<(f32 (uint_to_fp GR64:$src)),
|
||||
(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
def : Pat<(f64 (uint_to_fp GR32:$src)),
|
||||
(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
||||
def : Pat<(f64 (uint_to_fp GR64:$src)),
|
||||
(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Scalar convert from float/double to integer
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
|
||||
string asm> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
|
||||
let mayLoad = 1 in
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
// Convert float/double to signed/unsigned int 32/64
|
||||
defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
|
||||
ssmem, sse_load_f32, "cvtss2si{z}">,
|
||||
XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
|
||||
ssmem, sse_load_f32, "cvtss2si{z}">,
|
||||
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
|
||||
ssmem, sse_load_f32, "cvtss2usi{z}">,
|
||||
XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
|
||||
int_x86_avx512_cvtss2usi64, ssmem,
|
||||
sse_load_f32, "cvtss2usi{z}">, XS, VEX_W,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
|
||||
sdmem, sse_load_f64, "cvtsd2si{z}">,
|
||||
XD, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
|
||||
sdmem, sse_load_f64, "cvtsd2si{z}">,
|
||||
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
|
||||
sdmem, sse_load_f64, "cvtsd2usi{z}">,
|
||||
XD, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
|
||||
int_x86_avx512_cvtsd2usi64, sdmem,
|
||||
sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}",
|
||||
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
|
||||
defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}",
|
||||
SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
|
||||
defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}",
|
||||
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
|
||||
defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}",
|
||||
SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
|
||||
|
||||
defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||
int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}",
|
||||
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
|
||||
defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||
int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}",
|
||||
SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
|
||||
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}",
|
||||
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
|
||||
defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||
int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}",
|
||||
SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
|
||||
|
||||
// Convert float/double to signed/unsigned int 32/64 with truncation
|
||||
defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
|
||||
ssmem, sse_load_f32, "cvttss2si{z}">,
|
||||
XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
|
||||
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
|
||||
"cvttss2si{z}">, XS, VEX_W,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
|
||||
sdmem, sse_load_f64, "cvttsd2si{z}">, XD,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
|
||||
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
|
||||
"cvttsd2si{z}">, XD, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
|
||||
int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
|
||||
"cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
|
||||
int_x86_avx512_cvttss2usi64, ssmem,
|
||||
sse_load_f32, "cvttss2usi{z}">, XS, VEX_W,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
|
||||
int_x86_avx512_cvttsd2usi,
|
||||
sdmem, sse_load_f64, "cvttsd2usi{z}">, XD,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
|
||||
int_x86_avx512_cvttsd2usi64, sdmem,
|
||||
sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
|
||||
multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
|
||||
string asm> {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
|
||||
}
|
||||
|
||||
defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
|
||||
loadf32, "cvttss2si{z}">, XS,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
|
||||
loadf32, "cvttss2usi{z}">, XS,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
|
||||
loadf32, "cvttss2si{z}">, XS, VEX_W,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
|
||||
loadf32, "cvttss2usi{z}">, XS, VEX_W,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
|
||||
loadf64, "cvttsd2si{z}">, XD,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
|
||||
loadf64, "cvttsd2usi{z}">, XD,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
|
||||
loadf64, "cvttsd2si{z}">, XD, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
|
||||
loadf64, "cvttsd2usi{z}">, XD, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Convert form float to double and back
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1232,10 +1232,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
|
||||
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
|
||||
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
|
||||
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
|
||||
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
|
||||
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
|
||||
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
|
||||
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
|
||||
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
|
||||
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
|
||||
@ -1425,6 +1421,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
|
||||
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
|
||||
// AVX-512 VPERMI instructions with 3 source operands.
|
||||
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
|
||||
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
|
||||
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
|
||||
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
|
||||
};
|
||||
|
||||
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
|
||||
|
@ -184,3 +184,34 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind {
|
||||
ret <16 x float> %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fptosi02
|
||||
; CHECK vcvttss2siz
|
||||
; CHECK: ret
|
||||
define i32 @fptosi02(float %a) nounwind {
|
||||
%b = fptosi float %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fptoui02
|
||||
; CHECK vcvttss2usiz
|
||||
; CHECK: ret
|
||||
define i32 @fptoui02(float %a) nounwind {
|
||||
%b = fptoui float %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @uitofp02
|
||||
; CHECK vcvtusi2ss
|
||||
; CHECK: ret
|
||||
define float @uitofp02(i32 %a) nounwind {
|
||||
%b = uitofp i32 %a to float
|
||||
ret float %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @uitofp03
|
||||
; CHECK vcvtusi2sd
|
||||
; CHECK: ret
|
||||
define double @uitofp03(i32 %a) nounwind {
|
||||
%b = uitofp i32 %a to double
|
||||
ret double %b
|
||||
}
|
||||
|
@ -86,3 +86,61 @@ define <2 x double> @test_x86_avx3_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
|
||||
; CHECK: vcvtsd2siz
|
||||
%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||
ret i64 %res
|
||||
}
|
||||
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
|
||||
; CHECK: vcvtsi2sdqz
|
||||
%res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
|
||||
; CHECK: vcvtusi2sdqz
|
||||
%res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
|
||||
|
||||
define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
|
||||
; CHECK: vcvttsd2siz
|
||||
%res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||
ret i64 %res
|
||||
}
|
||||
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
|
||||
; CHECK: vcvtss2siz
|
||||
%res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
|
||||
ret i64 %res
|
||||
}
|
||||
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
|
||||
; CHECK: vcvtsi2ssqz
|
||||
%res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
|
||||
|
||||
|
||||
define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
|
||||
; CHECK: vcvttss2siz
|
||||
%res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
|
||||
ret i64 %res
|
||||
}
|
||||
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
|
||||
|
||||
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
|
||||
; CHECK: vcvtsd2usiz
|
||||
%res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||
ret i64 %res
|
||||
}
|
||||
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
|
||||
|
Loading…
x
Reference in New Issue
Block a user