AArch64: fix LowerCONCAT_VECTORS for new CodeGen.

The function was making too many assumptions about its input: 1. The NEON_VDUP optimisation was far too aggressive, assuming (I think) that the input would always be BUILD_VECTOR. 2. We were treating most unknown concats as legal (by returning Op rather than SDValue()). I think only concats of pairs of vectors are actually legal. http://llvm.org/PR19094 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203450 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-28 23:38:59 +00:00 · 2014-03-10 09:34:07 +00:00 · 2014-03-10 09:34:07 +00:00 · 8ca089df49
commit 8ca089df49
parent 31da39e479
2 changed files with 32 additions and 10 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
  // We custom lower concat_vectors with 4, 8, or 16 operands that are all the
  // same operand and of type v1* using the DUP instruction.
  unsigned NumOps = Op->getNumOperands();
-  if (NumOps != 4 && NumOps != 8 && NumOps != 16)
+  if (NumOps == 2) {
+    assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
    return Op;
+  }
+
+  if (NumOps != 4 && NumOps != 8 && NumOps != 16)
+    return SDValue();

  // Must be a single value for VDUP.
-  bool isConstant = true;
  SDValue Op0 = Op.getOperand(0);
  for (unsigned i = 1; i < NumOps; ++i) {
    SDValue OpN = Op.getOperand(i);
    if (Op0 != OpN)
-      return Op;
-
-    if (!isa<ConstantSDNode>(OpN->getOperand(0)))
-      isConstant = false;
+      return SDValue();
  }

  // Verify the value type.
@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
  default: llvm_unreachable("Unexpected number of operands");
  case 4:
    if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
-      return Op;
+      return SDValue();
    break;
  case 8:
    if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
-      return Op;
+      return SDValue();
    break;
  case 16:
    if (EltVT != MVT::v1i8)
-      return Op;
+      return SDValue();
    break;
  }

  SDLoc DL(Op);
  EVT VT = Op.getValueType();
  // VDUP produces better code for constants.
-  if (isConstant)
+  if (Op0->getOpcode() == ISD::BUILD_VECTOR)
    return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
  return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
                     DAG.getConstant(0, MVT::i64));
--- a/test/CodeGen/AArch64/concatvector-v8i8-bug.ll
+++ b/test/CodeGen/AArch64/concatvector-v8i8-bug.ll
@ -45,3 +45,24 @@ for.body130.us.us:                                ; preds = %for.body130.us.us,
  br label %for.body130.us.us
 }

+declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
+
+define <8 x i16> @test_splat(i32 %l) nounwind {
+; CHECK-LABEL: test_splat:
+; CHECK: ret
+  %lhs = insertelement <1 x i32> undef, i32 %l, i32 0
+  %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
+  %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %vec
+}
+
+
+define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind {
+; CHECK-LABEL: test_notsplat:
+; CHECK: ret
+entry:
+  %lhs = insertelement <1 x i32> undef, i32 %l, i32 0
+  %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
+  %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i16> %vec
+}