mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-19 06:31:18 +00:00
AVX-512: added scalar convert instructions and intrinsics.
Fixed load folding in VPERM2I instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
26ba5df2eb
commit
714319a169
@ -206,6 +206,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
|
def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||||
llvm_i64_ty], [IntrNoMem]>;
|
llvm_i64_ty], [IntrNoMem]>;
|
||||||
|
// avx-512 for unsigned conversion
|
||||||
|
def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
|
||||||
|
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
|
||||||
|
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">,
|
||||||
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||||
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">,
|
||||||
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||||
|
llvm_i64_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
|
def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
|
||||||
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
|
def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
|
||||||
@ -484,6 +500,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
|
def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||||
llvm_i64_ty], [IntrNoMem]>;
|
llvm_i64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
|
||||||
|
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
|
||||||
|
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">,
|
||||||
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||||
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
|
||||||
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||||
|
llvm_i64_ty], [IntrNoMem]>;
|
||||||
def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
|
def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||||
llvm_v2f64_ty], [IntrNoMem]>;
|
llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
|
@ -1330,7 +1330,16 @@ void X86TargetLowering::resetOperationActions() {
|
|||||||
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
||||||
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
|
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
|
||||||
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
|
||||||
|
if (Subtarget->is64Bit()) {
|
||||||
|
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
|
||||||
|
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
|
||||||
|
}
|
||||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
|
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
|
||||||
|
@ -2215,42 +2215,208 @@ multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
|||||||
X86MemOperand x86memop, string asm> {
|
X86MemOperand x86memop, string asm> {
|
||||||
let neverHasSideEffects = 1 in {
|
let neverHasSideEffects = 1 in {
|
||||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
|
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
|
||||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
|
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
|
||||||
|
EVEX_4V;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
|
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||||
(ins DstRC:$src1, x86memop:$src),
|
(ins DstRC:$src1, x86memop:$src),
|
||||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V;
|
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
|
||||||
|
EVEX_4V;
|
||||||
} // neverHasSideEffects = 1
|
} // neverHasSideEffects = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
|
defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
|
||||||
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||||
defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
|
defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
|
||||||
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||||
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
|
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
|
||||||
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||||
defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
|
defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
|
||||||
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||||
|
|
||||||
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
|
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
|
||||||
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
|
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
|
||||||
(VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
|
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
|
||||||
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
|
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
|
||||||
(VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
|
|
||||||
def : Pat<(f32 (sint_to_fp GR32:$src)),
|
def : Pat<(f32 (sint_to_fp GR32:$src)),
|
||||||
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
|
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
|
||||||
def : Pat<(f32 (sint_to_fp GR64:$src)),
|
def : Pat<(f32 (sint_to_fp GR64:$src)),
|
||||||
(VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
||||||
def : Pat<(f64 (sint_to_fp GR32:$src)),
|
def : Pat<(f64 (sint_to_fp GR32:$src)),
|
||||||
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
||||||
def : Pat<(f64 (sint_to_fp GR64:$src)),
|
def : Pat<(f64 (sint_to_fp GR64:$src)),
|
||||||
(VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||||
|
|
||||||
|
defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">,
|
||||||
|
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">,
|
||||||
|
XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">,
|
||||||
|
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">,
|
||||||
|
XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||||
|
|
||||||
|
def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
|
||||||
|
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
|
def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
|
||||||
|
(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
|
def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
|
||||||
|
(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
|
def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
|
||||||
|
(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(f32 (uint_to_fp GR32:$src)),
|
||||||
|
(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
|
||||||
|
def : Pat<(f32 (uint_to_fp GR64:$src)),
|
||||||
|
(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
|
||||||
|
def : Pat<(f64 (uint_to_fp GR32:$src)),
|
||||||
|
(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
|
||||||
|
def : Pat<(f64 (uint_to_fp GR64:$src)),
|
||||||
|
(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AVX-512 Scalar convert from float/double to integer
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||||
|
Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
|
||||||
|
string asm> {
|
||||||
|
let neverHasSideEffects = 1 in {
|
||||||
|
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
|
||||||
|
} // neverHasSideEffects = 1
|
||||||
|
}
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
// Convert float/double to signed/unsigned int 32/64
|
||||||
|
defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
|
||||||
|
ssmem, sse_load_f32, "cvtss2si{z}">,
|
||||||
|
XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
|
||||||
|
ssmem, sse_load_f32, "cvtss2si{z}">,
|
||||||
|
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
|
||||||
|
ssmem, sse_load_f32, "cvtss2usi{z}">,
|
||||||
|
XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
|
||||||
|
int_x86_avx512_cvtss2usi64, ssmem,
|
||||||
|
sse_load_f32, "cvtss2usi{z}">, XS, VEX_W,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
|
||||||
|
sdmem, sse_load_f64, "cvtsd2si{z}">,
|
||||||
|
XD, EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
|
||||||
|
sdmem, sse_load_f64, "cvtsd2si{z}">,
|
||||||
|
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
|
||||||
|
sdmem, sse_load_f64, "cvtsd2usi{z}">,
|
||||||
|
XD, EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
|
||||||
|
int_x86_avx512_cvtsd2usi64, sdmem,
|
||||||
|
sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
|
||||||
|
defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||||
|
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
|
||||||
|
defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||||
|
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
|
||||||
|
defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||||
|
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
|
||||||
|
defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||||
|
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
|
||||||
|
|
||||||
|
defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||||
|
int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
|
||||||
|
defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||||
|
int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
|
||||||
|
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
|
||||||
|
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
|
||||||
|
defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
|
||||||
|
int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}",
|
||||||
|
SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
|
||||||
|
|
||||||
|
// Convert float/double to signed/unsigned int 32/64 with truncation
|
||||||
|
defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
|
||||||
|
ssmem, sse_load_f32, "cvttss2si{z}">,
|
||||||
|
XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
|
||||||
|
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
|
||||||
|
"cvttss2si{z}">, XS, VEX_W,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
|
||||||
|
sdmem, sse_load_f64, "cvttsd2si{z}">, XD,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
|
||||||
|
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
|
||||||
|
"cvttsd2si{z}">, XD, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
|
||||||
|
int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
|
||||||
|
"cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
|
||||||
|
int_x86_avx512_cvttss2usi64, ssmem,
|
||||||
|
sse_load_f32, "cvttss2usi{z}">, XS, VEX_W,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
|
||||||
|
int_x86_avx512_cvttsd2usi,
|
||||||
|
sdmem, sse_load_f64, "cvttsd2usi{z}">, XD,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
|
||||||
|
int_x86_avx512_cvttsd2usi64, sdmem,
|
||||||
|
sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||||
|
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
|
||||||
|
string asm> {
|
||||||
|
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
|
||||||
|
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||||
|
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
|
||||||
|
loadf32, "cvttss2si{z}">, XS,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
|
||||||
|
loadf32, "cvttss2usi{z}">, XS,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
|
||||||
|
loadf32, "cvttss2si{z}">, XS, VEX_W,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
|
||||||
|
loadf32, "cvttss2usi{z}">, XS, VEX_W,
|
||||||
|
EVEX_CD8<32, CD8VT1>;
|
||||||
|
defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
|
||||||
|
loadf64, "cvttsd2si{z}">, XD,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
|
||||||
|
loadf64, "cvttsd2usi{z}">, XD,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
|
||||||
|
loadf64, "cvttsd2si{z}">, XD, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
|
defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
|
||||||
|
loadf64, "cvttsd2usi{z}">, XD, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VT1>;
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 Convert form float to double and back
|
// AVX-512 Convert form float to double and back
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -1232,10 +1232,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||||||
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
|
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
|
||||||
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
|
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
|
||||||
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
|
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
|
||||||
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
|
|
||||||
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
|
|
||||||
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
|
|
||||||
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
|
|
||||||
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
|
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
|
||||||
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
|
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
|
||||||
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
|
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
|
||||||
@ -1425,6 +1421,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||||||
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
|
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
|
||||||
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
|
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
|
||||||
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
|
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
|
||||||
|
// AVX-512 VPERMI instructions with 3 source operands.
|
||||||
|
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
|
||||||
|
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
|
||||||
|
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
|
||||||
|
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
|
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
|
||||||
|
@ -184,3 +184,34 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind {
|
|||||||
ret <16 x float> %b
|
ret <16 x float> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fptosi02
|
||||||
|
; CHECK vcvttss2siz
|
||||||
|
; CHECK: ret
|
||||||
|
define i32 @fptosi02(float %a) nounwind {
|
||||||
|
%b = fptosi float %a to i32
|
||||||
|
ret i32 %b
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fptoui02
|
||||||
|
; CHECK vcvttss2usiz
|
||||||
|
; CHECK: ret
|
||||||
|
define i32 @fptoui02(float %a) nounwind {
|
||||||
|
%b = fptoui float %a to i32
|
||||||
|
ret i32 %b
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @uitofp02
|
||||||
|
; CHECK vcvtusi2ss
|
||||||
|
; CHECK: ret
|
||||||
|
define float @uitofp02(i32 %a) nounwind {
|
||||||
|
%b = uitofp i32 %a to float
|
||||||
|
ret float %b
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @uitofp03
|
||||||
|
; CHECK vcvtusi2sd
|
||||||
|
; CHECK: ret
|
||||||
|
define double @uitofp03(i32 %a) nounwind {
|
||||||
|
%b = uitofp i32 %a to double
|
||||||
|
ret double %b
|
||||||
|
}
|
||||||
|
@ -86,3 +86,61 @@ define <2 x double> @test_x86_avx3_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
|
|||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
|
define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
|
||||||
|
; CHECK: vcvtsd2siz
|
||||||
|
%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
|
||||||
|
; CHECK: vcvtsi2sdqz
|
||||||
|
%res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
|
||||||
|
ret <2 x double> %res
|
||||||
|
}
|
||||||
|
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
|
||||||
|
; CHECK: vcvtusi2sdqz
|
||||||
|
%res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
|
||||||
|
ret <2 x double> %res
|
||||||
|
}
|
||||||
|
declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
|
||||||
|
|
||||||
|
define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
|
||||||
|
; CHECK: vcvttsd2siz
|
||||||
|
%res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
|
||||||
|
; CHECK: vcvtss2siz
|
||||||
|
%res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
|
||||||
|
; CHECK: vcvtsi2ssqz
|
||||||
|
%res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
|
||||||
|
; CHECK: vcvttss2siz
|
||||||
|
%res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
|
||||||
|
; CHECK: vcvtsd2usiz
|
||||||
|
%res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
|
||||||
|
ret i64 %res
|
||||||
|
}
|
||||||
|
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
|
||||||
|
Loading…
x
Reference in New Issue
Block a user