SSE codegen for vsetcc nodes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@53719 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2008-07-17 16:51:19 +00:00
parent bc1f989e3c
commit 30a0de94e7
5 changed files with 209 additions and 43 deletions

View File

@ -744,6 +744,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pcmpeqq : GCCBuiltin<"__builtin_ia32_pcmpeqq">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse42_pcmpgtq : GCCBuiltin<"__builtin_ia32_pcmpgtq">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, Commutative]>;

View File

@ -620,7 +620,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::VSETCC, MVT::v4f32, Legal);
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
}
if (Subtarget->hasSSE2()) {
@ -646,11 +646,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::VSETCC, MVT::v2f64, Legal);
setOperationAction(ISD::VSETCC, MVT::v16i8, Legal);
setOperationAction(ISD::VSETCC, MVT::v8i16, Legal);
setOperationAction(ISD::VSETCC, MVT::v4i32, Legal);
setOperationAction(ISD::VSETCC, MVT::v2i64, Legal);
setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
@ -728,6 +727,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
if (Subtarget->hasSSE42()) {
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@ -4685,6 +4688,113 @@ SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
}
}
SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
SDOperand Cond;
SDOperand Op0 = Op.getOperand(0);
SDOperand Op1 = Op.getOperand(1);
SDOperand CC = Op.getOperand(2);
MVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
if (isFP) {
unsigned SSECC = 8;
unsigned Opc = Op0.getValueType() == MVT::v4f32 ? X86ISD::CMPPS :
X86ISD::CMPPD;
bool Swap = false;
switch (SetCCOpcode) {
default: break;
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; // Fallthrough
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETONE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true;
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the one special case we can't handle, emit two comparisons.
if (SSECC == 8) {
SDOperand UNORD, EQ;
assert(SetCCOpcode == ISD::SETUEQ && "Illegal FP comparison");
UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
return DAG.getNode(ISD::OR, VT, UNORD, EQ);
}
// Handle all other FP comparisons here.
return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
}
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
bool Swap = false, Invert = false, FlipSigns = false;
switch (VT.getSimpleVT()) {
default: break;
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
switch (SetCCOpcode) {
default: break;
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = EQOpc; break;
case ISD::SETLT: Swap = true;
case ISD::SETGT: Opc = GTOpc; break;
case ISD::SETGE: Swap = true;
case ISD::SETLE: Opc = GTOpc; Invert = true; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
}
if (Swap)
std::swap(Op0, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
SDOperand SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT);
std::vector<SDOperand> SignBits(VT.getVectorNumElements(), SignBit);
SDOperand SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0],
SignBits.size());
Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec);
Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec);
}
SDOperand Result = DAG.getNode(Opc, VT, Op0, Op1);
// If the logical-not of the result is required, perform that now.
if (Invert) {
MVT EltVT = VT.getVectorElementType();
SDOperand NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT);
std::vector<SDOperand> NegOnes(VT.getVectorNumElements(), NegOne);
SDOperand NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0],
NegOnes.size());
Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV);
}
return Result;
}
SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
bool addTest = true;
@ -5728,6 +5838,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@ -5819,6 +5930,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
case X86ISD::CMPPD: return "X86ISD::CMPPD";
case X86ISD::CMPPS: return "X86ISD::CMPPS";
case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
}
}

View File

@ -208,7 +208,14 @@ namespace llvm {
VZEXT_LOAD,
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL
VSHL, VSRL,
// CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS,
// PCMP* - Vector integer comparisons.
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ
};
}
@ -521,6 +528,7 @@ namespace llvm {
SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerVSETCC(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);

View File

@ -215,6 +215,12 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//

View File

@ -20,6 +20,8 @@
def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
SDTCisFP<0>, SDTCisInt<2> ]>;
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@ -53,6 +55,16 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad]>;
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@ -163,22 +175,6 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
return getI32Imm(N->getValue() >> 3);
}]>;
def SSE_CC_imm : SDNodeXForm<cond, [{
unsigned Val;
switch (N->get()) {
default: Val = 0; assert(0 && "Unexpected CondCode"); break;
case ISD::SETOEQ: Val = 0; break;
case ISD::SETOLT: Val = 1; break;
case ISD::SETOLE: Val = 2; break;
case ISD::SETUO: Val = 3; break;
case ISD::SETONE: Val = 4; break;
case ISD::SETOGE: Val = 5; break;
case ISD::SETOGT: Val = 6; break;
case ISD::SETO: Val = 7; break;
}
return getI8Imm(Val);
}]>;
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
// SHUFP* etc. imm.
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
@ -896,10 +892,10 @@ let Constraints = "$src1 = $dst" in {
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
(memop addr:$src), imm:$cc))]>;
}
def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), VR128:$src2, cond:$cc)),
(CMPPSrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), (memop addr:$src2), cond:$cc)),
(CMPPSrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
@ -1725,10 +1721,10 @@ let Constraints = "$src1 = $dst" in {
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
(memop addr:$src), imm:$cc))]>;
}
def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), VR128:$src2, cond:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), (memop addr:$src2), cond:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
def : Pat<(v2i64 (X86cmppd VR128:$src1, VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd VR128:$src1, (memop addr:$src2), imm:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
@ -1994,30 +1990,30 @@ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETEQ)),
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETEQ)),
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
(PCMPEQBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETEQ)),
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
(PCMPEQWrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETEQ)),
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
(PCMPEQWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETEQ)),
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
(PCMPEQDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETEQ)),
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
(PCMPEQDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETGT)),
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
(PCMPGTBrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETGT)),
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
(PCMPGTBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETGT)),
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
(PCMPGTWrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETGT)),
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
(PCMPGTWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETGT)),
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
(PCMPGTDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETGT)),
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
@ -3258,6 +3254,11 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
int_x86_sse41_pmaxuw, 1>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
(PCMPEQQrm VR128:$src1, addr:$src2)>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
let Constraints = "$src1 = $dst" in {
@ -3555,3 +3556,30 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
}
defm PCMPGTQ : SS41I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;