mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-18 13:34:04 +00:00
[X86][XOP] Enable commutation for XOP instructions
Patch to allow XOP instructions (integer comparison and integer multiply-add) to be commuted. The comparison instructions sometimes require the compare mode to be flipped but the remaining instructions can use default commutation modes. This patch also sets the SSE domains of all the XOP instructions. Differential Revision: http://reviews.llvm.org/D7646 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229267 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
32f60795f5
commit
6d5ee8a8b5
@ -2906,6 +2906,32 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case X86::VPCOMBri: case X86::VPCOMUBri:
|
||||||
|
case X86::VPCOMDri: case X86::VPCOMUDri:
|
||||||
|
case X86::VPCOMQri: case X86::VPCOMUQri:
|
||||||
|
case X86::VPCOMWri: case X86::VPCOMUWri: {
|
||||||
|
// Flip comparison mode immediate (if necessary).
|
||||||
|
unsigned Imm = MI->getOperand(3).getImm() & 0x7;
|
||||||
|
switch (Imm) {
|
||||||
|
case 0x00: Imm = 0x02; break; // LT -> GT
|
||||||
|
case 0x01: Imm = 0x03; break; // LE -> GE
|
||||||
|
case 0x02: Imm = 0x00; break; // GT -> LT
|
||||||
|
case 0x03: Imm = 0x01; break; // GE -> LE
|
||||||
|
case 0x04: // EQ
|
||||||
|
case 0x05: // NE
|
||||||
|
case 0x06: // FALSE
|
||||||
|
case 0x07: // TRUE
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (NewMI) {
|
||||||
|
MachineFunction &MF = *MI->getParent()->getParent();
|
||||||
|
MI = MF.CloneMachineInstr(MI);
|
||||||
|
NewMI = false;
|
||||||
|
}
|
||||||
|
MI->getOperand(3).setImm(Imm);
|
||||||
|
return TargetInstrInfo::commuteInstruction(MI, NewMI);
|
||||||
|
}
|
||||||
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
|
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
|
||||||
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
|
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
|
||||||
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
|
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
|
||||||
|
@ -20,21 +20,23 @@ multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
|
|||||||
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
|
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
|
let ExeDomain = SSEPackedInt in {
|
||||||
defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
|
defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
|
||||||
defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
|
defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
|
||||||
defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
|
defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
|
||||||
defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
|
defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
|
||||||
defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
|
defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
|
||||||
defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
|
defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
|
||||||
defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
|
defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
|
||||||
defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
|
defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
|
||||||
defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
|
defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
|
||||||
defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
|
defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
|
||||||
defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
|
defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
|
||||||
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
|
defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
|
||||||
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
|
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
|
||||||
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
|
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
|
||||||
|
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
|
||||||
|
}
|
||||||
|
|
||||||
// Scalar load 2 addr operand instructions
|
// Scalar load 2 addr operand instructions
|
||||||
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||||
@ -47,11 +49,6 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
|||||||
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP;
|
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
|
||||||
ssmem, sse_load_f32>;
|
|
||||||
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
|
|
||||||
sdmem, sse_load_f64>;
|
|
||||||
|
|
||||||
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||||
PatFrag memop> {
|
PatFrag memop> {
|
||||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
@ -62,9 +59,6 @@ multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
|||||||
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
|
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32>;
|
|
||||||
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64>;
|
|
||||||
|
|
||||||
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||||
PatFrag memop> {
|
PatFrag memop> {
|
||||||
def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||||
@ -75,8 +69,19 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
|||||||
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L;
|
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32>;
|
let ExeDomain = SSEPackedSingle in {
|
||||||
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
|
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
||||||
|
ssmem, sse_load_f32>;
|
||||||
|
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32>;
|
||||||
|
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ExeDomain = SSEPackedDouble in {
|
||||||
|
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
|
||||||
|
sdmem, sse_load_f64>;
|
||||||
|
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64>;
|
||||||
|
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
|
||||||
|
}
|
||||||
|
|
||||||
multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
@ -97,18 +102,20 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||||||
XOP_4VOp3;
|
XOP_4VOp3;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
|
let ExeDomain = SSEPackedInt in {
|
||||||
defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
|
defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
|
||||||
defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
|
defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
|
||||||
defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
|
defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
|
||||||
defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
|
defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
|
||||||
defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
|
defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
|
||||||
defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
|
defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
|
||||||
defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
|
defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
|
||||||
defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
|
defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
|
||||||
defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
|
defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
|
||||||
defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
|
defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
|
||||||
defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
|
defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
|
||||||
|
defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
|
||||||
|
}
|
||||||
|
|
||||||
multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||||
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
@ -122,13 +129,16 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||||||
(Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
|
(Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
|
let ExeDomain = SSEPackedInt in {
|
||||||
defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
|
defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
|
||||||
defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
|
defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
|
||||||
defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
|
defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
|
||||||
|
defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
|
||||||
|
}
|
||||||
|
|
||||||
// Instruction where second source can be memory, but third must be register
|
// Instruction where second source can be memory, but third must be register
|
||||||
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||||
|
let isCommutable = 1 in
|
||||||
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
@ -144,21 +154,24 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||||||
VR128:$src3))]>, XOP_4V, VEX_I8IMM;
|
VR128:$src3))]>, XOP_4V, VEX_I8IMM;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
|
let ExeDomain = SSEPackedInt in {
|
||||||
defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
|
defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
|
||||||
defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
|
defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
|
||||||
defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
|
defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
|
||||||
defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
|
defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
|
||||||
defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
|
defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
|
||||||
defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
|
defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
|
||||||
defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
|
defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
|
||||||
defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
|
defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
|
||||||
defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
|
defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
|
||||||
defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
|
defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
|
||||||
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
|
defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
|
||||||
|
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
|
||||||
|
}
|
||||||
|
|
||||||
// Instruction where second source can be memory, third must be imm8
|
// Instruction where second source can be memory, third must be imm8
|
||||||
multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
|
multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
|
||||||
|
let isCommutable = 1 in
|
||||||
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
|
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
|
||||||
!strconcat("vpcom${cc}", Suffix,
|
!strconcat("vpcom${cc}", Suffix,
|
||||||
@ -187,14 +200,16 @@ multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPCOMB : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
|
let ExeDomain = SSEPackedInt in { // SSE integer instructions
|
||||||
defm VPCOMW : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
|
defm VPCOMB : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
|
||||||
defm VPCOMD : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
|
defm VPCOMW : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
|
||||||
defm VPCOMQ : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
|
defm VPCOMD : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
|
||||||
defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
|
defm VPCOMQ : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
|
||||||
defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
|
defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
|
||||||
defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
|
defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
|
||||||
defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
|
defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
|
||||||
|
defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
|
||||||
|
}
|
||||||
|
|
||||||
// Instruction where either second or third source can be memory
|
// Instruction where either second or third source can be memory
|
||||||
multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||||
@ -222,8 +237,10 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||||||
XOP_4V, VEX_I8IMM;
|
XOP_4V, VEX_I8IMM;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
|
let ExeDomain = SSEPackedInt in {
|
||||||
defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
|
defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
|
||||||
|
defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
|
||||||
|
}
|
||||||
|
|
||||||
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||||
def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
|
def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
|
||||||
@ -250,7 +267,8 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||||||
XOP_4V, VEX_I8IMM, VEX_L;
|
XOP_4V, VEX_I8IMM, VEX_L;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
|
let ExeDomain = SSEPackedInt in
|
||||||
|
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
|
||||||
|
|
||||||
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
||||||
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
|
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
|
||||||
@ -295,8 +313,11 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
|||||||
VEX_L;
|
VEX_L;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
|
let ExeDomain = SSEPackedDouble in
|
||||||
int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;
|
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
|
||||||
defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
|
int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;
|
||||||
int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;
|
|
||||||
|
let ExeDomain = SSEPackedSingle in
|
||||||
|
defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
|
||||||
|
int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;
|
||||||
|
|
||||||
|
184
test/CodeGen/X86/commute-xop.ll
Normal file
184
test/CodeGen/X86/commute-xop.ll
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
; RUN: llc -O3 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
|
||||||
|
|
||||||
|
define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomb
|
||||||
|
;CHECK: vpcomgtb (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <16 x i8>* %a0
|
||||||
|
%2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
|
||||||
|
ret <16 x i8> %2
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomd
|
||||||
|
;CHECK: vpcomged (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomq
|
||||||
|
;CHECK: vpcomltq (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <2 x i64>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomub
|
||||||
|
;CHECK: vpcomleub (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <16 x i8>* %a0
|
||||||
|
%2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
|
||||||
|
ret <16 x i8> %2
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomud
|
||||||
|
;CHECK: vpcomequd (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomuq
|
||||||
|
;CHECK: vpcomnequq (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <2 x i64>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomuw
|
||||||
|
;CHECK: vpcomfalseuw (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
|
||||||
|
ret <8 x i16> %2
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpcomw
|
||||||
|
;CHECK: vpcomtruew (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
|
||||||
|
ret <8 x i16> %2
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacsdd
|
||||||
|
;CHECK: vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacsdqh
|
||||||
|
;CHECK: vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacsdql
|
||||||
|
;CHECK: vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacssdd
|
||||||
|
;CHECK: vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacssdqh
|
||||||
|
;CHECK: vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacssdql
|
||||||
|
;CHECK: vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <4 x i32>* %a0
|
||||||
|
%2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
|
||||||
|
ret <2 x i64> %2
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacsswd
|
||||||
|
;CHECK: vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacssww
|
||||||
|
;CHECK: vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
|
||||||
|
ret <8 x i16> %2
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacswd
|
||||||
|
;CHECK: vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmacsww
|
||||||
|
;CHECK: vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
|
||||||
|
ret <8 x i16> %2
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmadcsswd
|
||||||
|
;CHECK: vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
|
||||||
|
;CHECK-LABEL: commute_fold_vpmadcswd
|
||||||
|
;CHECK: vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
|
||||||
|
%1 = load <8 x i16>* %a0
|
||||||
|
%2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user