diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 940a9c90593..04de324eabf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -331,12 +331,24 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); EVT NVT = N->getValueType(0).getVectorElementType(); SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 54277103902..2edb19281de 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -538,7 +538,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::v2f32, Expand); } - setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::VSELECT); } @@ -4284,32 +4283,6 @@ static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return SDValue(N, 0); } -// v1i1 setcc -> -// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt)) -// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result. -// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC. -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { - EVT ResVT = N->getValueType(0); - - if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 || - ResVT.getVectorElementType() != MVT::i1) - return SDValue(); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - EVT CmpVT = LHS.getValueType(); - LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - CmpVT.getVectorElementType(), LHS, - DAG.getConstant(0, MVT::i64)); - RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - CmpVT.getVectorElementType(), RHS, - DAG.getConstant(0, MVT::i64)); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS, - cast(N->getOperand(2))->get()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC); -} - // vselect (v1i1 setcc) -> // vselect (v1iXX setcc) (XX is the size of the compared operand type) // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as @@ -4378,7 +4351,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DCI, getSubtarget()); - case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG); case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG); case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG); case ISD::INTRINSIC_WO_CHAIN: diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 255685232d1..63957abdfdd 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -434,6 +434,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::VSELECT); + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; @@ -7227,6 +7229,36 @@ static SDValue performBRCONDCombine(SDNode *N, return SDValue(); } +// vselect (v1i1 setcc) -> +// vselect (v1iXX setcc) (XX is the size of the compared operand type) +// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as +// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine +// such VSELECT. +static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT CCVT = N0.getValueType(); + + if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || + CCVT.getVectorElementType() != MVT::i1) + return SDValue(); + + EVT ResVT = N->getValueType(0); + EVT CmpVT = N0.getOperand(0).getValueType(); + // Only combine when the result type is of the same size as the compared + // operands. + if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) + return SDValue(); + + SDValue IfTrue = N->getOperand(1); + SDValue IfFalse = N->getOperand(2); + SDValue SetCC = + DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, + IfTrue, IfFalse); +} + SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -7255,6 +7287,8 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, return performBitcastCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); + case ISD::VSELECT: + return performVSelectCombine(N, DCI.DAG); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case ARM64ISD::BRCOND: diff --git a/test/CodeGen/ARM64/neon-v1i1-setcc.ll b/test/CodeGen/ARM64/neon-v1i1-setcc.ll new file mode 100644 index 00000000000..a7e59fbc002 --- /dev/null +++ b/test/CodeGen/ARM64/neon-v1i1-setcc.ll @@ -0,0 +1,65 @@ +; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s + +; This is the analogue of AArch64's file of the same name. It's mostly testing +; some form of correct lowering occurs, the tests are a little artificial but I +; strongly suspect there's room for improved CodeGen (FIXME). + +define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_0: +; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: csinc + %1 = icmp sge <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_1: +; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}} + %1 = fcmp oeq <1 x double> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_0: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_1: +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = fcmp oeq <1 x double> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { +; CHECK-LABEL: test_select_v1i1_2: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 + ret <1 x double> %res +} + +define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_br_extr_cmp: +; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}} + %1 = icmp eq <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + br i1 %2, label %if.end, label %if.then + +if.then: + ret i32 0; + +if.end: + ret i32 1; +}