[AArch64] Fix a bug generating incorrect instruction when building small vector.

This bug is introduced by r211144. The element of operand may be smaller than the element of result, but previous commit can only handle the contrary condition. This commit is to handle this scenario and generate optimized codes like ZIP1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213830 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-04 10:30:01 +00:00 · 2014-07-24 02:05:42 +00:00 · 2014-07-24 02:05:42 +00:00 · 2daff76c05
commit 2daff76c05
parent 1bc34d71b7
2 changed files with 70 additions and 38 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -4180,9 +4180,22 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
  if (SourceVecs.size() > 2)
    return SDValue();

-  SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  // Find out the smallest element size among result and two sources, and use
+  // it as element size to build the shuffle_vector.
+  EVT SmallestEltTy = VT.getVectorElementType();
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
+    if (SrcEltTy.bitsLT(SmallestEltTy)) {
+      SmallestEltTy = SrcEltTy;
+    }
+  }
+  unsigned ResMultiplier =
+      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
  int VEXTOffsets[2] = { 0, 0 };
  int OffsetMultipliers[2] = { 1, 1 };
+  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};

  // This loop extracts the usage patterns of the source vectors
  // and prepares appropriate SDValues for a shuffle if possible.
@ -4190,15 +4203,15 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
    unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
    SDValue CurSource = SourceVecs[i];
    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType()) {
-      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
-      // Then bitcast it to the vector which holds asserted element type,
-      // and record the multiplier of element width between SourceVecs and
-      // Build_vector which is needed to extract the correct lanes later.
-      EVT CastVT =
-          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-                           SourceVecs[i].getValueSizeInBits() /
-                               VT.getVectorElementType().getSizeInBits());
+        ShuffleVT.getVectorElementType()) {
+      // As ShuffleVT holds smallest element size, it may hit here only if
+      // the element type of SourceVecs is bigger than that of ShuffleVT.
+      // Adjust the element size of SourceVecs to match ShuffleVT, and record
+      // the multipliers.
+      EVT CastVT = EVT::getVectorVT(
+          *DAG.getContext(), ShuffleVT.getVectorElementType(),
+          SourceVecs[i].getValueSizeInBits() /
+              ShuffleVT.getVectorElementType().getSizeInBits());

      CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
      OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
@ -4207,7 +4220,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
      MinElts[i] *= OffsetMultipliers[i];
    }

-    if (CurSource.getValueType() == VT) {
+    if (CurSource.getValueType() == ShuffleVT) {
      // No VEXT necessary
      ShuffleSrcs[i] = CurSource;
      VEXTOffsets[i] = 0;
@ -4215,8 +4228,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
    } else if (NumSrcElts < NumElts) {
      // We can pad out the smaller vector for free, so if it's part of a
      // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
-                                   DAG.getUNDEF(CurSource.getValueType()));
+      ShuffleSrcs[i] =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
+                      DAG.getUNDEF(CurSource.getValueType()));
      continue;
    }

@ -4233,50 +4247,61 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
    if (MinElts[i] >= NumElts) {
      // The extraction can just take the second half
      VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(NumElts));
    } else if (MaxElts[i] < NumElts) {
      // The extraction can just take the first half
      VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(0));
    } else {
      // An actual VEXT is needed
      VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(NumElts));
      unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
-      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(Imm, MVT::i32));
+      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
+                                   VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
    }
  }

  SmallVector<int, 8> Mask;
+  unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();

-  for (unsigned i = 0; i < NumElts; ++i) {
+  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
    SDValue Entry = Op.getOperand(i);
-    if (Entry.getOpcode() == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
+    int SourceNum = 1;
+    unsigned LanePartNum = 0;
+    int ExtractElt;
+    if (Entry.getOpcode() != ISD::UNDEF) {
+      // Check how many parts of source lane should be inserted.
+      SDValue ExtractVec = Entry.getOperand(0);
+      if (ExtractVec == SourceVecs[0])
+        SourceNum = 0;
+      ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+      unsigned ExtEltSize =
+          ExtractVec.getValueType().getVectorElementType().getSizeInBits();
+      unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
+      LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
    }

-    SDValue ExtractVec = Entry.getOperand(0);
-    int ExtractElt =
-        cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
-    if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
-    } else {
-      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
-                     VEXTOffsets[1]);
+    for (unsigned j = 0; j != ResMultiplier; ++j) {
+      if (j < LanePartNum)
+        Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
+                       NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
+      else
+        Mask.push_back(-1);
    }
  }

  // Final check before we try to produce nonsense...
-  if (isShuffleMaskLegal(Mask, VT))
-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
-                                &Mask[0]);
+  if (isShuffleMaskLegal(Mask, ShuffleVT)) {
+    SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
+                                           ShuffleSrcs[1], &Mask[0]);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+  }

  return SDValue();
 }
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@ -1387,6 +1387,13 @@ entry:
  ret <8 x i16> %shuffle.i
 }

+define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
+; CHECK-LABEL: test_vzip1_v4i8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i8> %lo
+}
+
 define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
 ; CHECK-LABEL: test_same_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b