mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-12 07:37:34 +00:00
AVX-512: Enabled SSE intrinsics on AVX-512.
Predicate UseAVX depricates pattern selection on AVX-512. This predicate is necessary for DAG selection to select EVEX form. But mapping SSE intrinsics to AVX-512 instructions is not ready yet. So I replaced UseAVX with HasAVX for intrinsics patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237903 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b6ea67e027
commit
86425451e5
@ -442,12 +442,29 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
asm));
|
||||
}
|
||||
|
||||
// SIi8 - SSE 1 & 2 scalar instructions
|
||||
// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
|
||||
class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, InstrItinClass itin = NoItinerary,
|
||||
Domain d = GenericDomain>
|
||||
: I<o, F, outs, ins, asm, pattern, itin, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
[UseSSE1])))));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
|
||||
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, itin> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
[UseSSE2])));
|
||||
|
||||
|
@ -264,7 +264,7 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
Operand memopr, ComplexPattern mem_cpat,
|
||||
Domain d, OpndItins itins, bit Is2Addr = 1> {
|
||||
let isCodeGenOnly = 1 in {
|
||||
def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
@ -272,7 +272,7 @@ let isCodeGenOnly = 1 in {
|
||||
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
|
||||
RC:$src1, RC:$src2))], itins.rr, d>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
|
||||
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
@ -1851,14 +1851,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
|
||||
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
|
||||
VR128:$src1, sse_load_f64:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
|
||||
IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -1936,14 +1936,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
|
||||
IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
|
||||
IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
|
||||
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
|
||||
@ -3380,7 +3380,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, Operand vec_memop,
|
||||
ComplexPattern mem_cpat,
|
||||
Intrinsic Intr, SDNode OpNode, Domain d,
|
||||
OpndItins itins, Predicate target, string Suffix> {
|
||||
OpndItins itins, string Suffix> {
|
||||
let hasSideEffects = 0 in {
|
||||
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
@ -3402,7 +3402,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [target] in {
|
||||
let Predicates = [UseAVX] in {
|
||||
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
|
||||
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
|
||||
|
||||
@ -3410,6 +3410,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
|
||||
mem_cpat:$src)>;
|
||||
|
||||
}
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(Intr VR128:$src),
|
||||
(!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
|
||||
VR128:$src)>;
|
||||
@ -3418,7 +3420,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
|
||||
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
|
||||
}
|
||||
let Predicates = [target, OptForSize] in
|
||||
let Predicates = [UseAVX, OptForSize] in
|
||||
def : Pat<(ScalarVT (OpNode (load addr:$src))),
|
||||
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
|
||||
addr:$src)>;
|
||||
@ -3505,7 +3507,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
|
||||
f32mem, ssmem, sse_load_f32,
|
||||
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
|
||||
SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
|
||||
SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
|
||||
}
|
||||
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
@ -3517,7 +3519,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
|
||||
f64mem, sdmem, sse_load_f64,
|
||||
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
|
||||
OpNode, SSEPackedDouble, itins, UseAVX, "SD">,
|
||||
OpNode, SSEPackedDouble, itins, "SD">,
|
||||
XD, VEX_4V, VEX_LIG;
|
||||
}
|
||||
|
||||
@ -4980,7 +4982,7 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Store / copy lower 64-bits of a XMM register.
|
||||
//
|
||||
let Predicates = [UseAVX] in
|
||||
let Predicates = [HasAVX] in
|
||||
def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
|
||||
(VMOVPQI2QImr addr:$dst, VR128:$src)>;
|
||||
let Predicates = [UseSSE2] in
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: addss
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: addsd
|
||||
@ -142,7 +143,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
|
||||
; CHECK: cvtsd2ss
|
||||
; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
||||
; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
||||
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: blendpd
|
||||
|
Loading…
x
Reference in New Issue
Block a user