mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
SSE codegen for vsetcc nodes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@53719 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bc1f989e3c
commit
30a0de94e7
@ -744,6 +744,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse41_pcmpeqq : GCCBuiltin<"__builtin_ia32_pcmpeqq">,
|
||||
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_x86_sse42_pcmpgtq : GCCBuiltin<"__builtin_ia32_pcmpgtq">,
|
||||
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
@ -620,7 +620,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE2()) {
|
||||
@ -646,11 +646,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
|
||||
|
||||
setOperationAction(ISD::VSETCC, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
|
||||
@ -728,6 +727,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE42()) {
|
||||
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
|
||||
}
|
||||
|
||||
// We want to custom lower some of our intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
@ -4685,6 +4688,113 @@ SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
|
||||
}
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
|
||||
SDOperand Cond;
|
||||
SDOperand Op0 = Op.getOperand(0);
|
||||
SDOperand Op1 = Op.getOperand(1);
|
||||
SDOperand CC = Op.getOperand(2);
|
||||
MVT VT = Op.getValueType();
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
|
||||
|
||||
if (isFP) {
|
||||
unsigned SSECC = 8;
|
||||
unsigned Opc = Op0.getValueType() == MVT::v4f32 ? X86ISD::CMPPS :
|
||||
X86ISD::CMPPD;
|
||||
bool Swap = false;
|
||||
|
||||
switch (SetCCOpcode) {
|
||||
default: break;
|
||||
case ISD::SETEQ: SSECC = 0; break;
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETGT: Swap = true; // Fallthrough
|
||||
case ISD::SETLT:
|
||||
case ISD::SETOLT: SSECC = 1; break;
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETGE: Swap = true; // Fallthrough
|
||||
case ISD::SETLE:
|
||||
case ISD::SETOLE: SSECC = 2; break;
|
||||
case ISD::SETUO: SSECC = 3; break;
|
||||
case ISD::SETONE:
|
||||
case ISD::SETNE: SSECC = 4; break;
|
||||
case ISD::SETULE: Swap = true;
|
||||
case ISD::SETUGE: SSECC = 5; break;
|
||||
case ISD::SETULT: Swap = true;
|
||||
case ISD::SETUGT: SSECC = 6; break;
|
||||
case ISD::SETO: SSECC = 7; break;
|
||||
}
|
||||
if (Swap)
|
||||
std::swap(Op0, Op1);
|
||||
|
||||
// In the one special case we can't handle, emit two comparisons.
|
||||
if (SSECC == 8) {
|
||||
SDOperand UNORD, EQ;
|
||||
|
||||
assert(SetCCOpcode == ISD::SETUEQ && "Illegal FP comparison");
|
||||
|
||||
UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
|
||||
EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
|
||||
return DAG.getNode(ISD::OR, VT, UNORD, EQ);
|
||||
}
|
||||
// Handle all other FP comparisons here.
|
||||
return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
|
||||
}
|
||||
|
||||
// We are handling one of the integer comparisons here. Since SSE only has
|
||||
// GT and EQ comparisons for integer, swapping operands and multiple
|
||||
// operations may be required for some comparisons.
|
||||
unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
|
||||
bool Swap = false, Invert = false, FlipSigns = false;
|
||||
|
||||
switch (VT.getSimpleVT()) {
|
||||
default: break;
|
||||
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
|
||||
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
|
||||
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
|
||||
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
|
||||
}
|
||||
|
||||
switch (SetCCOpcode) {
|
||||
default: break;
|
||||
case ISD::SETNE: Invert = true;
|
||||
case ISD::SETEQ: Opc = EQOpc; break;
|
||||
case ISD::SETLT: Swap = true;
|
||||
case ISD::SETGT: Opc = GTOpc; break;
|
||||
case ISD::SETGE: Swap = true;
|
||||
case ISD::SETLE: Opc = GTOpc; Invert = true; break;
|
||||
case ISD::SETULT: Swap = true;
|
||||
case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
|
||||
case ISD::SETUGE: Swap = true;
|
||||
case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
|
||||
}
|
||||
if (Swap)
|
||||
std::swap(Op0, Op1);
|
||||
|
||||
// Since SSE has no unsigned integer comparisons, we need to flip the sign
|
||||
// bits of the inputs before performing those operations.
|
||||
if (FlipSigns) {
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
SDOperand SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT);
|
||||
std::vector<SDOperand> SignBits(VT.getVectorNumElements(), SignBit);
|
||||
SDOperand SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0],
|
||||
SignBits.size());
|
||||
Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec);
|
||||
Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec);
|
||||
}
|
||||
|
||||
SDOperand Result = DAG.getNode(Opc, VT, Op0, Op1);
|
||||
|
||||
// If the logical-not of the result is required, perform that now.
|
||||
if (Invert) {
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
SDOperand NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT);
|
||||
std::vector<SDOperand> NegOnes(VT.getVectorNumElements(), NegOne);
|
||||
SDOperand NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0],
|
||||
NegOnes.size());
|
||||
Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
|
||||
bool addTest = true;
|
||||
@ -5728,6 +5838,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
case ISD::FNEG: return LowerFNEG(Op, DAG);
|
||||
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
|
||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
|
||||
@ -5819,6 +5930,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VSHL: return "X86ISD::VSHL";
|
||||
case X86ISD::VSRL: return "X86ISD::VSRL";
|
||||
case X86ISD::CMPPD: return "X86ISD::CMPPD";
|
||||
case X86ISD::CMPPS: return "X86ISD::CMPPS";
|
||||
case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
|
||||
case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
|
||||
case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
|
||||
case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
|
||||
case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
|
||||
case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
|
||||
case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
|
||||
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,14 @@ namespace llvm {
|
||||
VZEXT_LOAD,
|
||||
|
||||
// VSHL, VSRL - Vector logical left / right shift.
|
||||
VSHL, VSRL
|
||||
VSHL, VSRL,
|
||||
|
||||
// CMPPD, CMPPS - Vector double/float comparison.
|
||||
CMPPD, CMPPS,
|
||||
|
||||
// PCMP* - Vector integer comparisons.
|
||||
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
|
||||
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ
|
||||
};
|
||||
}
|
||||
|
||||
@ -521,6 +528,7 @@ namespace llvm {
|
||||
SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerVSETCC(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
|
||||
|
@ -215,6 +215,12 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
|
||||
|
||||
// SSE4.2 Instruction Templates:
|
||||
//
|
||||
// SS428I - SSE 4.2 instructions with T8 prefix.
|
||||
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
|
||||
|
||||
// X86-64 Instruction templates...
|
||||
//
|
||||
|
@ -20,6 +20,8 @@
|
||||
|
||||
def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
|
||||
SDTCisFP<0>, SDTCisInt<2> ]>;
|
||||
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
|
||||
SDTCisFP<1>, SDTCisVT<3, i8>]>;
|
||||
|
||||
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
||||
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
||||
@ -53,6 +55,16 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
|
||||
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
|
||||
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
|
||||
def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
|
||||
def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
|
||||
def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
|
||||
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
|
||||
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE Complex Patterns
|
||||
@ -163,22 +175,6 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
|
||||
return getI32Imm(N->getValue() >> 3);
|
||||
}]>;
|
||||
|
||||
def SSE_CC_imm : SDNodeXForm<cond, [{
|
||||
unsigned Val;
|
||||
switch (N->get()) {
|
||||
default: Val = 0; assert(0 && "Unexpected CondCode"); break;
|
||||
case ISD::SETOEQ: Val = 0; break;
|
||||
case ISD::SETOLT: Val = 1; break;
|
||||
case ISD::SETOLE: Val = 2; break;
|
||||
case ISD::SETUO: Val = 3; break;
|
||||
case ISD::SETONE: Val = 4; break;
|
||||
case ISD::SETOGE: Val = 5; break;
|
||||
case ISD::SETOGT: Val = 6; break;
|
||||
case ISD::SETO: Val = 7; break;
|
||||
}
|
||||
return getI8Imm(Val);
|
||||
}]>;
|
||||
|
||||
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
|
||||
// SHUFP* etc. imm.
|
||||
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
|
||||
@ -896,10 +892,10 @@ let Constraints = "$src1 = $dst" in {
|
||||
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
|
||||
(memop addr:$src), imm:$cc))]>;
|
||||
}
|
||||
def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), VR128:$src2, cond:$cc)),
|
||||
(CMPPSrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
|
||||
def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), (memop addr:$src2), cond:$cc)),
|
||||
(CMPPSrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
|
||||
(CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>;
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
|
||||
|
||||
// Shuffle and unpack instructions
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -1725,10 +1721,10 @@ let Constraints = "$src1 = $dst" in {
|
||||
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
|
||||
(memop addr:$src), imm:$cc))]>;
|
||||
}
|
||||
def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), VR128:$src2, cond:$cc)),
|
||||
(CMPPDrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
|
||||
def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), (memop addr:$src2), cond:$cc)),
|
||||
(CMPPDrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
|
||||
def : Pat<(v2i64 (X86cmppd VR128:$src1, VR128:$src2, imm:$cc)),
|
||||
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
|
||||
def : Pat<(v2i64 (X86cmppd VR128:$src1, (memop addr:$src2), imm:$cc)),
|
||||
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
|
||||
|
||||
// Shuffle and unpack instructions
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -1994,30 +1990,30 @@ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
|
||||
defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
|
||||
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
|
||||
|
||||
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETEQ)),
|
||||
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETEQ)),
|
||||
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQBrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETEQ)),
|
||||
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETEQ)),
|
||||
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETEQ)),
|
||||
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETEQ)),
|
||||
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETGT)),
|
||||
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
|
||||
(PCMPGTBrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETGT)),
|
||||
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPGTBrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETGT)),
|
||||
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
|
||||
(PCMPGTWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETGT)),
|
||||
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPGTWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETGT)),
|
||||
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
|
||||
(PCMPGTDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETGT)),
|
||||
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPGTDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
|
||||
@ -3258,6 +3254,11 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
||||
int_x86_sse41_pmaxuw, 1>;
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -3555,3 +3556,30 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
|
||||
|
||||
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Commutable = 0> {
|
||||
def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
defm PCMPGTQ : SS41I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
|
||||
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPGTQrm VR128:$src1, addr:$src2)>;
|
||||
|
Loading…
Reference in New Issue
Block a user