diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index de0f6ce26d9..8237ef3779b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4346,6 +4346,28 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // Treat SETCC as a vector mask and promote the result type based on the + // targets expected SETCC result type. This will ensure that SETCC and VSELECT + // are both split by the type legalizer. This is done to prevent the type + // legalizer from unrolling SETCC into scalar comparions. + EVT SelectVT = N->getValueType(0); + EVT MaskVT = getSetCCResultType(SelectVT); + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() != MaskVT) { + SDLoc MaskDL(N0); + + // Extend the mask to the desired value type. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + SDValue Mask = DAG.getNode(ExtendCode, MaskDL, MaskVT, N0); + + AddToWorkList(Mask.getNode()); + + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + + return DAG.getNode(ISD::VSELECT, DL, SelectVT, Mask, LHS, RHS); + } + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 7b1d14dad0c..f1b06fcd983 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -492,14 +492,19 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(0, TLI.getVectorIdxTy())); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy())); + if (Cond.getOpcode() == ISD::SETCC) { + assert(Cond.getValueType() == getSetCCResultType(N->getValueType(0)) && + "Condition has not been prepared for split!"); + GetSplitVector(Cond, CL, CH); + } else { + EVT ETy = Cond.getValueType().getVectorElementType(); + unsigned NumElements = Cond.getValueType().getVectorNumElements(); + EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), ETy, NumElements / 2); + CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getConstant(0, TLI.getVectorIdxTy())); + CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy())); + } } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5dbef0f6fc3..18064fc6159 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1546,7 +1546,16 @@ void X86TargetLowering::resetOperationActions() { } EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) return MVT::i8; + if (!VT.isVector()) + return MVT::i8; + + const TargetMachine &TM = getTargetMachine(); + if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) + switch(VT.getVectorNumElements()) { + case 8: return MVT::v8i1; + case 16: return MVT::v16i1; + } + return VT.changeVectorElementTypeToInteger(); } diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll new file mode 100644 index 00000000000..f9e7c20ba4e --- /dev/null +++ b/test/CodeGen/X86/vec_split.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4 +; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 +; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 + +define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) { +; SSE4-LABEL: split16: +; SSE4: pminuw +; SSE4: pminuw +; SSE4: ret +; AVX1-LABEL: split16: +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: ret +; AVX2-LABEL: split16: +; AVX2: vpminuw +; AVX2: ret + %1 = icmp ult <16 x i16> %a, %b + %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b + ret <16 x i16> %2 +} + +define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) { +; SSE4-LABEL: split32: +; SSE4: pminuw +; SSE4: pminuw +; SSE4: pminuw +; SSE4: pminuw +; SSE4: ret +; AVX1-LABEL: split32: +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: vpminuw +; AVX1: ret +; AVX2-LABEL: split32: +; AVX2: vpminuw +; AVX2: vpminuw +; AVX2: ret + %1 = icmp ult <32 x i16> %a, %b + %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b + ret <32 x i16> %2 +}