mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 21:32:39 +00:00
Simple refactoring of SSE4.1 instructions, making room for the AVX forms
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107540 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ff23535df6
commit
03560600b4
@ -3904,7 +3904,7 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 Instructions
|
||||
// SSE4.1 - Misc Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
|
||||
@ -3948,56 +3948,61 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
|
||||
string OpcodeStr,
|
||||
Intrinsic F32Int,
|
||||
Intrinsic F64Int> {
|
||||
Intrinsic F64Int, bit Is2Addr = 1> {
|
||||
// Intrinsic operation, reg.
|
||||
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
||||
(outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
||||
(outs VR128:$dst),
|
||||
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
|
||||
// Intrinsic operation, reg.
|
||||
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
||||
(outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
||||
(outs VR128:$dst),
|
||||
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
}
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
// FP round - roundss, roundps, roundsd, roundpd
|
||||
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
|
||||
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
||||
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
|
||||
|
||||
@ -4020,145 +4025,106 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
|
||||
int_x86_sse41_phminposuw>;
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Commutable = 0> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Is2Addr = 1> {
|
||||
let isCommutable = 1 in
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
|
||||
int_x86_sse41_pcmpeqq, 1>;
|
||||
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
|
||||
int_x86_sse41_packusdw, 0>;
|
||||
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
|
||||
int_x86_sse41_pminsb, 1>;
|
||||
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
|
||||
int_x86_sse41_pminsd, 1>;
|
||||
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
|
||||
int_x86_sse41_pminud, 1>;
|
||||
defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
|
||||
int_x86_sse41_pminuw, 1>;
|
||||
defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
|
||||
int_x86_sse41_pmaxsb, 1>;
|
||||
defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
|
||||
int_x86_sse41_pmaxsd, 1>;
|
||||
defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
|
||||
int_x86_sse41_pmaxud, 1>;
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
||||
int_x86_sse41_pmaxuw, 1>;
|
||||
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in
|
||||
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
|
||||
defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
|
||||
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
|
||||
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
|
||||
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
|
||||
defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
|
||||
defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
|
||||
defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
|
||||
defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
|
||||
}
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
|
||||
SDNode OpNode, Intrinsic IntId128,
|
||||
bit Commutable = 0> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode (OpVT VR128:$src1),
|
||||
VR128:$src2))]>, OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
|
||||
def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1, (memop addr:$src2)))]>,
|
||||
OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
/// SS48I_binop_rm - Simple SSE41 binary operator.
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT, bit Commutable = 0> {
|
||||
ValueType OpVT, bit Is2Addr = 1> {
|
||||
let isCommutable = 1 in
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
|
||||
OpSize;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1,
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2))))]>,
|
||||
OpSize;
|
||||
}
|
||||
OpSize;
|
||||
}
|
||||
|
||||
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
|
||||
|
||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Commutable = 0> {
|
||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
|
||||
OpSize;
|
||||
}
|
||||
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Is2Addr = 1> {
|
||||
let isCommutable = 1 in
|
||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
|
||||
int_x86_sse41_blendps, 0>;
|
||||
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
|
||||
int_x86_sse41_blendpd, 0>;
|
||||
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
|
||||
int_x86_sse41_pblendw, 0>;
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
|
||||
int_x86_sse41_dpps, 1>;
|
||||
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
|
||||
int_x86_sse41_dppd, 1>;
|
||||
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
|
||||
int_x86_sse41_mpsadbw, 0>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in {
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
|
||||
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
|
||||
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
|
||||
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
|
||||
}
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
|
||||
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
|
||||
}
|
||||
|
||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
||||
|
Loading…
x
Reference in New Issue
Block a user