Improve codegen for BUILD_VECTORs on ARM.

If we have a BUILD_VECTOR that is mostly a constant splat, it is often better to splat that constant then insertelement the non-constant lanes instead of insertelementing every lane from an undef base. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163304 91177308-0d34-0410-b5e6-96231b3b80d8
2025-08-13 08:26:02 +00:00 · 2012-09-06 09:55:02 +00:00
parent 486270aee6
commit ba8562af44
2 changed files with 90 additions and 10 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -4161,10 +4161,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
  }
  // Scan through the operands to see if only one value is used.
  //
  // As an optimisation, even if more than one value is used it may be more
  // profitable to splat with one value then change some lanes.
  //
  // Heuristically we decide to do this if the vector has a "dominant" value,
  // defined as splatted to more than half of the lanes.
  unsigned NumElts = VT.getVectorNumElements();
  bool isOnlyLowElement = true;
  bool usesOnlyOneValue = true;
  bool hasDominantValue = false;
  bool isConstant = true;
  // Map of the number of times a particular SDValue appears in the
  // element list.
  DenseMap<SDValue, int> ValueCounts;
  SDValue Value;
  for (unsigned i = 0; i < NumElts; ++i) {
    SDValue V = Op.getOperand(i);
@@ -4175,13 +4186,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
      isConstant = false;
-    if (!Value.getNode())
+    ValueCounts.insert(std::make_pair(V, 0));
    int &Count = ValueCounts[V];
    // Is this value dominant? (takes up more than half of the lanes)
    if (++Count > (NumElts / 2)) {
      hasDominantValue = true;
      Value = V;
-    else if (V != Value)
+    }
      usesOnlyOneValue = false;
  }
  if (ValueCounts.size() != 1)
    usesOnlyOneValue = false;
  if (!Value.getNode() && ValueCounts.size() > 0)
    Value = ValueCounts.begin()->first;
-  if (!Value.getNode())
+  if (ValueCounts.size() == 0)
    return DAG.getUNDEF(VT);
  if (isOnlyLowElement)
@@ -4191,9 +4210,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
  // i32 and try again.
-  if (usesOnlyOneValue && EltSize <= 32) {
+  if (hasDominantValue && EltSize <= 32) {
-    if (!isConstant)
+    if (!isConstant) {
-      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+      SDValue N;
      // If we are VDUPing a value that comes directly from a vector, that will
      // cause an unnecessary move to and from a GPR, where instead we could
      // just use VDUPLANE.
      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
        N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                        Value->getOperand(0), Value->getOperand(1));
      else
        N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
      if (!usesOnlyOneValue) {
        // The dominant value was splatted as 'N', but we now have to insert
        // all differing elements.
        for (unsigned I = 0; I < NumElts; ++I) {
          if (Op.getOperand(I) == Value)
            continue;
          SmallVector<SDValue, 3> Ops;
          Ops.push_back(N);
          Ops.push_back(Op.getOperand(I));
          Ops.push_back(DAG.getConstant(I, MVT::i32));
          N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
        }
      }
      return N;
    }
    if (VT.getVectorElementType().isFloatingPoint()) {
      SmallVector<SDValue, 8> Ops;
      for (unsigned i = 0; i < NumElts; ++i)
@@ -4205,9 +4249,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
      if (Val.getNode())
        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
    }
-    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+    if (usesOnlyOneValue) {
-    if (Val.getNode())
+      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
-      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+      if (isConstant && Val.getNode())
        return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 
    }
  }
  // If all elements are constants and the case above didn't get hit, fall back
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
  store <8 x i8> %2, <8 x i8>* %ptr, align 8
  ret void
 }
 define <4 x i32> @tdupi(i32 %x, i32 %y) {
 ;CHECK: tdupi
 ;CHECK: vdup.32
  %1 = insertelement <4 x i32> undef, i32 %x, i32 0
  %2 = insertelement <4 x i32> %1, i32 %x, i32 1
  %3 = insertelement <4 x i32> %2, i32 %x, i32 2
  %4 = insertelement <4 x i32> %3, i32 %y, i32 3
  ret <4 x i32> %4
 }
 define <4 x float> @tdupf(float %x, float %y) {
 ;CHECK: tdupf
 ;CHECK: vdup.32
  %1 = insertelement <4 x float> undef, float %x, i32 0
  %2 = insertelement <4 x float> %1, float %x, i32 1
  %3 = insertelement <4 x float> %2, float %x, i32 2
  %4 = insertelement <4 x float> %3, float %y, i32 3
  ret <4 x float> %4
 }
 ; This test checks that when splatting an element from a vector into another,
 ; the value isn't moved out to GPRs first.
 define <4 x i32> @tduplane(<4 x i32> %invec) {
 ;CHECK: tduplane
 ;CHECK-NOT: vmov {{.*}}, d16[1]
 ;CHECK: vdup.32 {{.*}}, d16[1]
  %in = extractelement <4 x i32> %invec, i32 1
  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
  %4 = insertelement <4 x i32> %3, i32 255, i32 3
  ret <4 x i32> %4
 }