From fbad25e12073e2cbe192b2c4cc4f0bbb26148c9c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 11 Sep 2011 15:02:23 +0000 Subject: [PATCH] CR fixes per Bruno's request. Undo the changes from r139285 which added custom lowering to vselect. Add tablegen lowering for vselect. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139479 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSelectionDAG.td | 6 +- lib/Target/X86/X86ISelLowering.cpp | 67 +++++----------------- lib/Target/X86/X86ISelLowering.h | 1 - lib/Target/X86/X86InstrSSE.td | 70 +++++++++++------------ 4 files changed, 51 insertions(+), 93 deletions(-) diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 91db08d83d2..612635ea746 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -149,6 +149,10 @@ def SDTSelect : SDTypeProfile<1, 3, [ // select SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3> ]>; +def SDTVSelect : SDTypeProfile<1, 3, [ // vselect + SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3> +]>; + def SDTSelectCC : SDTypeProfile<1, 5, [ // select_cc SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>, SDTCisVT<5, OtherVT> @@ -390,8 +394,8 @@ def f32_to_f16 : SDNode<"ISD::FP32_TO_FP16", SDTFPToIntOp>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; +def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; -def vsetcc : SDNode<"ISD::VSETCC" , SDTSetCC>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dbf9ef01932..a3fb00af1ea 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -917,11 +917,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v2f64, Custom); - setOperationAction(ISD::VSELECT, MVT::v2i64, Custom); - setOperationAction(ISD::VSELECT, MVT::v16i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v4i32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are @@ -1019,10 +1019,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4f64, Custom); - setOperationAction(ISD::VSELECT, MVT::v4i64, Custom); - setOperationAction(ISD::VSELECT, MVT::v8i32, Custom); - setOperationAction(ISD::VSELECT, MVT::v8f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -8703,43 +8703,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } -SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); - DebugLoc DL = Op.getDebugLoc(); - - SDValue Ops[] = {Op1, Op2, Cond}; - - assert(Op1.getValueType().isVector() && "Op1 must be a vector"); - assert(Op2.getValueType().isVector() && "Op2 must be a vector"); - assert(Cond.getValueType().isVector() && "Cond must be a vector"); - assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch"); - - EVT VT = Op1.getValueType(); - switch (VT.getSimpleVT().SimpleTy) { - default: break; - // SSE4: - case MVT::v2i64: - case MVT::v2f64: - case MVT::v4i32: - case MVT::v4f32: - case MVT::v16i8: - case MVT::v8i16: - // AVX: - case MVT::v4i64: - case MVT::v4f64: - case MVT::v8i32: - case MVT::v8f32: - case MVT::v32i8: - case MVT::v16i16: - return DAG.getNode(X86ISD::BLENDV, DL, VT, Ops, array_lengthof(Ops)); - } - - return SDValue(); -} - - // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. @@ -9993,7 +9956,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -10008,13 +9971,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); - R = DAG.getNode(X86ISD::BLENDV, dl, VT, - R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, + R, DAG.getNode(ISD::ADD, dl, VT, R, R)); return R; } return SDValue(); @@ -10406,7 +10369,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -10651,7 +10613,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; - case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; @@ -13381,7 +13342,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(X86ISD::BLENDV, DL, MVT::v16i8, X, Y, Mask); + Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index bd04de150d9..408d78f2d8f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -809,7 +809,6 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cf363354c78..ef64be3005e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5869,36 +5869,27 @@ defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, let Predicates = [HasAVX] in { def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), - VR128:$mask)), - (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; -def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), - VR128:$mask)), - (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; -def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), - VR128:$mask)), - (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; -def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), - VR128:$mask)), - (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; -def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), - VR128:$mask)), - (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; + VR128:$mask)), (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), + (v8i32 VR256:$src2))), (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), + (v8f32 VR256:$src2))), (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), + (v4i64 VR256:$src2))), (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), + (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; -def : Pat<(v8i32 (X86blendv (v8i32 VR256:$src1), (v8i32 VR256:$src2), - VR256:$mask)), - (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; -def : Pat<(v8f32 (X86blendv (v8f32 VR256:$src1), (v8f32 VR256:$src2), - VR256:$mask)), - (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; - - -def : Pat<(v4i64 (X86blendv (v4i64 VR256:$src1), (v4i64 VR256:$src2), - VR256:$mask)), - (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; -def : Pat<(v4f64 (X86blendv (v4f64 VR256:$src1), (v4f64 VR256:$src2), - VR256:$mask)), - (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -5926,16 +5917,19 @@ defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; let Predicates = [HasSSE41] in { - def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), XMM0)), - (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), XMM0)), - (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), XMM0)), - (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), XMM0)), - (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), XMM0)), - (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; +def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), + VR128:$mask)), (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; + + def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX] in