Fixed PR13938: the ARM backend was crashing because it couldn't select a VDUPLANE node with the vector input size different from the output size. This was bacause the BUILD_VECTOR lowering code didn't check that the size of the input vector was correct for using VDUPLANE.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165929 91177308-0d34-0410-b5e6-96231b3b80d8
2025-08-10 02:25:47 +00:00 · 2012-10-15 09:41:32 +00:00
parent 81ff90db44
commit bb1078ea13
2 changed files with 55 additions and 2 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -4230,9 +4230,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
      // If we are VDUPing a value that comes directly from a vector, that will
      // cause an unnecessary move to and from a GPR, where instead we could
      // just use VDUPLANE.
-      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+        // We need to create a new undef vector to use for the VDUPLANE if the
+        // size of the vector from which we get the value is different than the
+        // size of the vector that we need to create. We will insert the element
+        // such that the register coalescer will remove unnecessary copies.
+        if (VT != Value->getOperand(0).getValueType()) {
+          ConstantSDNode *constIndex;
+          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+          assert(constIndex && "The index is not a constant!");
+          unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+                             VT.getVectorNumElements();
+          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+                        Value, DAG.getConstant(index, MVT::i32)),
+                           DAG.getConstant(index, MVT::i32));
+        } else {
          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                        Value->getOperand(0), Value->getOperand(1));
+        }
+      }
      else
        N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);

--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -295,3 +295,39 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
  %4 = insertelement <4 x i32> %3, i32 255, i32 3
  ret <4 x i32> %4
 }
+
+define <2 x float> @check_f32(<4 x float> %v) nounwind {
+;CHECK: check_f32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x float> %v, i32 3
+  %1 = insertelement  <2 x float> undef, float %x, i32 0
+  %2 = insertelement  <2 x float> %1, float %x, i32 1
+  ret <2 x float> %2
+}
+
+define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
+;CHECK: check_i32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x i32> %v, i32 3
+  %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
+  %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
+  ret <2 x i32> %2
+}
+
+define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
+;CHECK: check_i16:
+;CHECK: vdup.16 {{.*}}, d{{..}}[3]
+  %x = extractelement <8 x i16> %v, i32 3
+  %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
+  %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
+  ret <4 x i16> %2
+}
+
+define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
+;CHECK: check_i8:
+;CHECK: vdup.8 {{.*}}, d{{..}}[3]
+  %x = extractelement <16 x i8> %v, i32 3
+  %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
+  %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
+  ret <8 x i8> %2
+}