From 0a942dbb1e0f303191639498c35e742309f08a64 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 19 May 2010 01:08:17 +0000 Subject: [PATCH] Intrinsics which do a vector compare (results are all zero or all ones) are modeled as icmp / fcmp + sext. This is turned into a vsetcc by dag combine (yes, not a good long term solution). The targets can then isel the vsetcc to the appropriate instruction. The trouble arises when the result of a vector cmp + sext is then and'ed with all ones. Instcombine will turn it into a vector cmp + zext, dag combiner will miss turning it into a vsetcc and hell breaks loose after that. Teach dag combine to turn a vector cpm + zest into a vsetcc + and 1. This fixes rdar://7923010. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 81 ++++++++++++++++++++++-- test/CodeGen/ARM/vcgt.ll | 13 ++++ 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bfa302cf095..347024f0ae4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3570,7 +3570,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3591,9 +3591,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); - } - - + } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3732,8 +3730,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } - // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT N0VT = N0.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + SmallVector OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } else { + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), @@ -3889,8 +3927,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } - // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 6b11ba5ce69..194093c8418 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -158,5 +158,18 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x i32> %tmp3 } +; rdar://7923010 +define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vcgt_zext: +;CHECK: vcgt.f32 q0 +;CHECK: vmov.i32 q1, #0x1 +;CHECK: vand q0, q0, q1 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 + %tmp4 = zext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 +} + declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone