[DAGCombiner] Add support for TRUNCATE + FP_EXTEND vector constant folding

This patch adds supports for the vector constant folding of TRUNCATE and FP_EXTEND instructions and tidies up the SINT_TO_FP and UINT_TO_FP instructions to match. It also moves the vector constant folding for the FNEG and FABS instructions to use the DAG.getNode() functionality like the other unary instructions. Differential Revision: http://reviews.llvm.org/D8593 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233224 91177308-0d34-0410-b5e6-96231b3b80d8
2024-07-21 02:29:22 +00:00 · 2015-03-25 22:30:31 +00:00 · 2015-03-25 22:30:31 +00:00 · eb32048f80
commit eb32048f80
parent 00aafa5d73
5 changed files with 83 additions and 55 deletions
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -94,6 +94,10 @@ namespace ISD {
  /// all ConstantSDNode or undef.
  bool isBuildVectorOfConstantSDNodes(const SDNode *N);

+  /// \brief Return true if the specified node is a BUILD_VECTOR node of
+  /// all ConstantFPSDNode or undef.
+  bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
+
  /// Return true if the specified node is a
  /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
  /// element is not an undef.
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -251,7 +251,6 @@ namespace {
    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
    SDValue visitXOR(SDNode *N);
    SDValue SimplifyVBinOp(SDNode *N);
-    SDValue SimplifyVUnaryOp(SDNode *N);
    SDValue visitSHL(SDNode *N);
    SDValue visitSRA(SDNode *N);
    SDValue visitSRL(SDNode *N);
@ -716,6 +715,22 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
  return nullptr;
 }

+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
+  if (isa<ConstantSDNode>(N))
+    return N.getNode();
+  if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+    return N.getNode();
+  return nullptr;
+}
+
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+  if (isa<ConstantFPSDNode>(N))
+    return N.getNode();
+  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+    return N.getNode();
+  return nullptr;
+}
+
 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
 // int.
 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
@ -6557,7 +6572,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
  if (N0.getValueType() == N->getValueType(0))
    return N0;
  // fold (truncate c1) -> c1
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
  // fold (truncate (truncate x)) -> (truncate x)
  if (N0.getOpcode() == ISD::TRUNCATE)
@ -7947,8 +7962,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
  EVT OpVT = N0.getValueType();

  // fold (sint_to_fp c1) -> c1fp
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
      // ...but only if the target supports immediate floating-point values
      (!LegalOperations ||
       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@ -8000,8 +8014,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
  EVT OpVT = N0.getValueType();

  // fold (uint_to_fp c1) -> c1fp
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
      // ...but only if the target supports immediate floating-point values
      (!LegalOperations ||
       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@ -8159,7 +8172,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {

 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  EVT VT = N->getValueType(0);

  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@ -8168,7 +8180,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
    return SDValue();

  // fold (fp_extend c1fp) -> c1fp
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);

  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@ -8243,14 +8255,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
  SDValue N0 = N->getOperand(0);
  EVT VT = N->getValueType(0);

-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
  // Constant fold FNEG.
-  if (isa<ConstantFPSDNode>(N0))
-    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+  if (isConstantFPBuildVectorOrConstantFP(N0))
+    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);

  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
                         &DAG.getTarget().Options))
@ -8345,13 +8352,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
  SDValue N0 = N->getOperand(0);
  EVT VT = N->getValueType(0);

-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
  // fold (fabs c1) -> fabs(c1)
-  if (isa<ConstantFPSDNode>(N0))
+  if (isConstantFPBuildVectorOrConstantFP(N0))
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);

  // fold (fabs (fabs x)) -> (fabs x)
@ -12401,38 +12403,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
  return SDValue();
 }

-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
-  assert(N->getValueType(0).isVector() &&
-         "SimplifyVUnaryOp only works on vectors!");
-
-  SDValue N0 = N->getOperand(0);
-
-  if (N0.getOpcode() != ISD::BUILD_VECTOR)
-    return SDValue();
-
-  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
-  SmallVector<SDValue, 8> Ops;
-  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
-    SDValue Op = N0.getOperand(i);
-    if (Op.getOpcode() != ISD::UNDEF &&
-        Op.getOpcode() != ISD::ConstantFP)
-      break;
-    EVT EltVT = Op.getValueType();
-    SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
-    if (FoldOp.getOpcode() != ISD::UNDEF &&
-        FoldOp.getOpcode() != ISD::ConstantFP)
-      break;
-    Ops.push_back(FoldOp);
-    AddToWorklist(FoldOp.getNode());
-  }
-
-  if (Ops.size() != N0.getNumOperands())
-    return SDValue();
-
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
                                    SDValue N1, SDValue N2){
  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
  return true;
 }

+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+    if (!isa<ConstantFPSDNode>(Op))
+      return false;
+  }
+  return true;
+}
+
 /// isScalarToVector - Return true if the specified node is a
 /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
 /// element is not an undef.
@ -2827,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
    }
  }

-  // Constant fold unary operations with a vector integer operand.
+  // Constant fold unary operations with a vector integer or float operand.
  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
    if (BV->isConstant()) {
      switch (Opcode) {
@ -2835,6 +2851,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
        // FIXME: Entirely reasonable to perform folding of other unary
        // operations here as the need arises.
        break;
+      case ISD::FNEG:
+      case ISD::FABS:
+      case ISD::FP_EXTEND:
+      case ISD::TRUNCATE:
      case ISD::UINT_TO_FP:
      case ISD::SINT_TO_FP: {
        // Let the above scalar folding handle the folding of each element.
@ -2842,9 +2862,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
        for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
          SDValue OpN = BV->getOperand(i);
          OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN);
+          if (OpN.getOpcode() != ISD::UNDEF &&
+              OpN.getOpcode() != ISD::Constant &&
+              OpN.getOpcode() != ISD::ConstantFP)
+            break;
          Ops.push_back(OpN);
        }
-        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        if (Ops.size() == VT.getVectorNumElements())
+          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
      }
      }
    }
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@ -42,3 +42,15 @@ entry:
  store <8 x double> %1, <8 x double>* %out, align 1
  ret void
 }
+
+define <2 x double> @fpext_fromconst() {
+; CHECK-LABEL: fpext_fromconst:
+; AVX-LABEL: fpext_fromconst:
+entry:
+; CHECK: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
+; AVX: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
+  %0  = insertelement <2 x float> undef, float 1.0, i32 0
+  %1  = insertelement <2 x float> %0, float -2.0, i32 1
+  %2  = fpext <2 x float> %1 to <2 x double>
+  ret <2 x double> %2
+}
--- a/test/CodeGen/X86/vector-trunc.ll
+++ b/test/CodeGen/X86/vector-trunc.ll
@ -221,3 +221,20 @@ entry:
  %1 = bitcast <8 x i8> %0 to i64
  ret i64 %1
 }
+
+define <16 x i8> @trunc16i64_const() {
+; SSE-LABEL:  trunc16i64_const
+; SSE:        # BB#0: # %entry
+; SSE-NEXT:   xorps %xmm0, %xmm0
+; SSE-NEXT:   retq
+;
+; AVX-LABEL:  trunc16i64_const
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:   retq
+
+entry:
+  %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
+  %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
+  ret <16 x i8> %1
+}