From 6be2c58c8c4d2b8dede9e3d6920a18f04164388b Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 5 Apr 2006 23:38:46 +0000 Subject: [PATCH] Support for comi / ucomi intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27444 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 133 +++++++++++++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86InstrSSE.td | 33 ++++++- 3 files changed, 158 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 976286678da..c13d498d5e6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19,6 +19,7 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Intrinsics.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -323,6 +324,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); } + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + computeRegisterProperties(); // FIXME: These should be based on subtarget info. Plus, the values should @@ -1185,9 +1189,8 @@ static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { /// specific condition code. It returns a false if it cannot do a direct /// translation. X86CC is the translated CondCode. Flip is set to true if the /// the order of comparison operands should be flipped. -static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, - bool &Flip) { - ISD::CondCode SetCCOpcode = cast(CC)->get(); +static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, + unsigned &X86CC, bool &Flip) { Flip = false; X86CC = X86ISD::COND_INVALID; if (!isFP) { @@ -1237,6 +1240,11 @@ static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, return X86CC != X86ISD::COND_INVALID; } +static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, + bool &Flip) { + return translateX86CC(cast(CC)->get(), isFP, X86CC, Flip); +} + /// hasFPCMov - is there a floating point cmov for the specific X86 condition /// code. Current x86 isa includes the following FP cmov instructions: /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. @@ -2146,7 +2154,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // If the X86ISD::SETCC has more than one use, then it's probably better // to use a test instead of duplicating the X86ISD::CMP (for register // pressure reason). - if (Op0.getOperand(1).getOpcode() == X86ISD::CMP) { + unsigned CmpOpc = Op0.getOperand(1).getOpcode(); + if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || + CmpOpc == X86ISD::UCOMI) { if (!Op0.hasOneUse()) { std::vector Tys; for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) @@ -2160,7 +2170,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { CC = Op0.getOperand(0); Cond = Op0.getOperand(1); // Make a copy as flag result cannot be used by more than one. - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, + Cond = DAG.getNode(CmpOpc, MVT::Flag, Cond.getOperand(0), Cond.getOperand(1)); addTest = isFPStack && !hasFPCMov(cast(CC)->getSignExtended()); @@ -2201,7 +2211,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // If the X86ISD::SETCC has more than one use, then it's probably better // to use a test instead of duplicating the X86ISD::CMP (for register // pressure reason). - if (Cond.getOperand(1).getOpcode() == X86ISD::CMP) { + unsigned CmpOpc = Cond.getOperand(1).getOpcode(); + if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || + CmpOpc == X86ISD::UCOMI) { if (!Cond.hasOneUse()) { std::vector Tys; for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) @@ -2215,7 +2227,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { CC = Cond.getOperand(0); Cond = Cond.getOperand(1); // Make a copy as flag result cannot be used by more than one. - Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, + Cond = DAG.getNode(CmpOpc, MVT::Flag, Cond.getOperand(0), Cond.getOperand(1)); } else addTest = true; @@ -2829,6 +2841,111 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return SDOperand(); } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Op.getOperand(0))->getValue(); + switch (IntNo) { + default: return SDOperand(); // Don't custom lower most intrinsics. + // Comparison intrinsics. + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_comieq_sd: + case Intrinsic::x86_sse2_comilt_sd: + case Intrinsic::x86_sse2_comile_sd: + case Intrinsic::x86_sse2_comigt_sd: + case Intrinsic::x86_sse2_comige_sd: + case Intrinsic::x86_sse2_comineq_sd: + case Intrinsic::x86_sse2_ucomieq_sd: + case Intrinsic::x86_sse2_ucomilt_sd: + case Intrinsic::x86_sse2_ucomile_sd: + case Intrinsic::x86_sse2_ucomigt_sd: + case Intrinsic::x86_sse2_ucomige_sd: + case Intrinsic::x86_sse2_ucomineq_sd: { + unsigned Opc; + ISD::CondCode CC; + switch (IntNo) { + default: break; + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse2_comieq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse2_comilt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse2_comile_sd: + Opc = X86ISD::COMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse2_comigt_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse2_comige_sd: + Opc = X86ISD::COMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse2_comineq_sd: + Opc = X86ISD::COMI; + CC = ISD::SETNE; + break; + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse2_ucomieq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETEQ; + break; + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse2_ucomilt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLT; + break; + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse2_ucomile_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETLE; + break; + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse2_ucomigt_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGT; + break; + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse2_ucomige_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETGE; + break; + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_ucomineq_sd: + Opc = X86ISD::UCOMI; + CC = ISD::SETNE; + break; + } + bool Flip; + unsigned X86CC; + translateX86CC(CC, true, X86CC, Flip); + SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), + Op.getOperand(Flip?1:2)); + SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); + return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); + } + } + } } } @@ -2853,6 +2970,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::TEST: return "X86ISD::TEST"; + case X86ISD::COMI: return "X86ISD::COMI"; + case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ddb2d307ba9..aec0cd8993e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -106,7 +106,7 @@ namespace llvm { RDTSC_DAG, /// X86 compare and logical compare instructions. - CMP, TEST, + CMP, TEST, COMI, UCOMI, /// X86 SetCC. Operand 1 is condition code, and operand 2 is the flag /// operand produced by a CMP instruction. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index bdf5fe2de85..989daee33e0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -19,10 +19,14 @@ def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest, + [SDNPOutFlag]>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest, + [SDNPOutFlag]>; def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", @@ -559,6 +563,31 @@ def Int_CMPSDrm : SDI<0xC2, MRMSrcMem, "cmp${cc}sd {$src, $dst|$dst, $src}", []>; } +def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2), + "ucomiss {$src2, $src1|$src1, $src2}", + [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>; +def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), + "ucomiss {$src2, $src1|$src1, $src2}", + [(X86ucomi (v4f32 VR128:$src1), (loadv4f32 addr:$src2))]>; +def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2), + "ucomisd {$src2, $src1|$src1, $src2}", + [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>; +def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), + "ucomisd {$src2, $src1|$src1, $src2}", + [(X86ucomi (v2f64 VR128:$src1), (loadv2f64 addr:$src2))]>; + +def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2), + "comiss {$src2, $src1|$src1, $src2}", + [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>; +def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), + "comiss {$src2, $src1|$src1, $src2}", + [(X86comi (v4f32 VR128:$src1), (loadv4f32 addr:$src2))]>; +def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2), + "comisd {$src2, $src1|$src1, $src2}", + [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>; +def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), + "comisd {$src2, $src1|$src1, $src2}", + [(X86comi (v2f64 VR128:$src1), (loadv2f64 addr:$src2))]>; // Aliases of packed instructions for scalar use. These all have names that // start with 'Fs'.