InstCombine: form shuffles from wider range of insert/extractelements

Sequences of insertelement/extractelements are sometimes used to build vectorsr; this code tries to put them back together into shuffles, but could only produce a completely uniform shuffle types (<N x T> from two <N x T> sources). This should allow shuffles with different numbers of elements on the input and output sides as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203229 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-26 21:32:10 +00:00 · 2014-03-07 10:24:44 +00:00 · 2014-03-07 10:24:44 +00:00 · 69d2b2aa5a
commit 69d2b2aa5a
parent cd68cff830
3 changed files with 136 additions and 49 deletions
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@ -3321,6 +3321,11 @@ multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
                          (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
                   NoItinerary>;
  }
+
+  def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
+                      (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))),
+                      (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))),
+            (!cast<Instruction>(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>;
 }

 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
@ -5878,12 +5883,21 @@ multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
                 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
             (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;

+  def  : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
+               (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
+             (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+
  //swapped operands
  def  : Pat<(ResTy (opnode
               (OpVTy (scalar_to_vector
                 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
                 (OpVTy FPRC:$Rn))),
             (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+
+  def  : Pat<(ResTy (opnode
+               (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)),
+               (OpVTy FPRC:$Rn))),
+             (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
 }


@ -5975,6 +5989,13 @@ multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
             (ResTy (INST (ResTy ResFPRC:$Ra),
               (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;

+  def  : Pat<(ResTy (opnode
+               (ResTy ResFPRC:$Ra),
+               (ResTy (coreopnode (OpTy FPRC:$Rn),
+                 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))),
+             (ResTy (INST (ResTy ResFPRC:$Ra),
+               (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+
  // swapped operands
  def  : Pat<(ResTy (opnode
               (ResTy ResFPRC:$Ra),
@ -5984,6 +6005,14 @@ multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
                 (OpTy FPRC:$Rn))))),
             (ResTy (INST (ResTy ResFPRC:$Ra),
               (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+
+  def  : Pat<(ResTy (opnode
+               (ResTy ResFPRC:$Ra),
+               (ResTy (coreopnode
+                 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)),
+                 (OpTy FPRC:$Rn))))),
+             (ResTy (INST (ResTy ResFPRC:$Ra),
+               (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
 }

 // Patterns for Scalar Signed saturating
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@ -326,7 +326,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                         SmallVectorImpl<Constant*> &Mask) {
-  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+  assert(LHS->getType() == RHS->getType() &&
         "Invalid CollectSingleShuffleElements");
  unsigned NumElts = V->getType()->getVectorNumElements();

@ -367,10 +367,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
        return true;
      }
    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
-      if (isa<ConstantInt>(EI->getOperand(1)) &&
-          EI->getOperand(0)->getType() == V->getType()) {
+      if (isa<ConstantInt>(EI->getOperand(1))) {
        unsigned ExtractedIdx =
        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        unsigned NumLHSElts = LHS->getType()->getVectorNumElements();

        // This must be extracting from either LHS or RHS.
        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
@ -386,7 +386,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
              assert(EI->getOperand(0) == RHS);
              Mask[InsertedIdx % NumElts] =
              ConstantInt::get(Type::getInt32Ty(V->getContext()),
-                               ExtractedIdx+NumElts);
+                               ExtractedIdx + NumLHSElts);
            }
            return true;
          }
@ -394,29 +394,36 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
      }
    }
  }
-  // TODO: Handle shufflevector here!

  return false;
 }

-/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
-/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
-/// that computes V and the LHS value of the shuffle.
-static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
-                                     Value *&RHS) {
-  assert(V->getType()->isVectorTy() &&
-         (RHS == 0 || V->getType() == RHS->getType()) &&
-         "Invalid shuffle!");
+
+/// We are building a shuffle to create V, which is a sequence of insertelement,
+/// extractelement pairs. If PermittedRHS is set, then we must either use it or
+/// not rely on the second vector source. Return an std::pair containing the
+/// left and right vectors of the proposed shuffle (or 0), and set the Mask
+/// parameter as required.
+///
+/// Note: we intentionally don't try to fold earlier shuffles since they have
+/// often been chosen carefully to be efficiently implementable on the target.
+typedef std::pair<Value *, Value *> ShuffleOps;
+
+static ShuffleOps CollectShuffleElements(Value *V,
+                                         SmallVectorImpl<Constant *> &Mask,
+                                         Value *PermittedRHS) {
+  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();

  if (isa<UndefValue>(V)) {
    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
-    return V;
+    return std::make_pair(
+        PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
  }

  if (isa<ConstantAggregateZero>(V)) {
    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
-    return V;
+    return std::make_pair(V, nullptr);
  }

  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
@ -426,51 +433,59 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
    Value *IdxOp    = IEI->getOperand(2);

    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
-      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
-          EI->getOperand(0)->getType() == V->getType()) {
+      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
        unsigned ExtractedIdx =
          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

        // Either the extracted from or inserted into vector must be RHSVec,
        // otherwise we'd end up with a shuffle of three inputs.
-        if (EI->getOperand(0) == RHS || RHS == 0) {
-          RHS = EI->getOperand(0);
-          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+        if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
+          Value *RHS = EI->getOperand(0);
+          ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+          assert(LR.second == 0 || LR.second == RHS);
+
+          if (LR.first->getType() != RHS->getType()) {
+            // We tried our best, but we can't find anything compatible with RHS
+            // further up the chain. Return a trivial shuffle.
+            for (unsigned i = 0; i < NumElts; ++i)
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
+            return std::make_pair(V, nullptr);
+          }
+
+          unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
          Mask[InsertedIdx % NumElts] =
            ConstantInt::get(Type::getInt32Ty(V->getContext()),
-                             NumElts+ExtractedIdx);
-          return V;
+                             NumLHSElts+ExtractedIdx);
+          return std::make_pair(LR.first, RHS);
        }

-        if (VecOp == RHS) {
-          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
-          // Update Mask to reflect that `ScalarOp' has been inserted at
-          // position `InsertedIdx' within the vector returned by IEI.
-          Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
-
-          // Everything but the extracted element is replaced with the RHS.
-          for (unsigned i = 0; i != NumElts; ++i) {
-            if (i != InsertedIdx)
-              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
-                                         NumElts+i);
-          }
-          return V;
+        if (VecOp == PermittedRHS) {
+          // We've gone as far as we can: anything on the other side of the
+          // extractelement will already have been converted into a shuffle.
+          unsigned NumLHSElts =
+              EI->getOperand(0)->getType()->getVectorNumElements();
+          for (unsigned i = 0; i != NumElts; ++i)
+            Mask.push_back(ConstantInt::get(
+                Type::getInt32Ty(V->getContext()),
+                i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
+          return std::make_pair(EI->getOperand(0), PermittedRHS);
        }

        // If this insertelement is a chain that comes from exactly these two
        // vectors, return the vector and the effective shuffle.
-        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
-          return EI->getOperand(0);
+        if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
+            CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+                                         Mask))
+          return std::make_pair(EI->getOperand(0), PermittedRHS);
      }
    }
  }
-  // TODO: Handle shufflevector here!

  // Otherwise, can't do anything fancy.  Return an identity vector.
  for (unsigned i = 0; i != NumElts; ++i)
    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
-  return V;
+  return std::make_pair(V, nullptr);
 }

 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
@ -485,17 +500,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
  // If the inserted element was extracted from some other vector, and if the
  // indexes are constant, try to turn this into a shufflevector operation.
  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
-    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
-        EI->getOperand(0)->getType() == IE.getType()) {
-      unsigned NumVectorElts = IE.getType()->getNumElements();
+    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
+      unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+      unsigned NumExtractVectorElts =
+          EI->getOperand(0)->getType()->getVectorNumElements();
      unsigned ExtractedIdx =
        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

-      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+      if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
        return ReplaceInstUsesWith(IE, VecOp);

-      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
+      if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));

      // If we are extracting a value from a vector, then inserting it right
@ -507,11 +523,16 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
      // (and any insertelements it points to), into one big shuffle.
      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
        SmallVector<Constant*, 16> Mask;
-        Value *RHS = 0;
-        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
-        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
+
+        // The proposed shuffle may be trivial, in which case we shouldn't
+        // perform the combine.
+        if (LR.first != &IE && LR.second != &IE) {
          // We now have a shuffle of LHS, RHS, Mask.
-        return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+          if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
+          return new ShuffleVectorInst(LR.first, LR.second,
+                                       ConstantVector::get(Mask));
+        }
      }
    }
  }
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@ -0,0 +1,37 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define <1 x i8> @test1(<8 x i8> %in) {
+; CHECK-LABEL: @test1
+; CHECK: shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> <i32 5>
+  %val = extractelement <8 x i8> %in, i32 5
+  %vec = insertelement <1 x i8> undef, i8 %val, i32 0
+  ret <1 x i8> %vec
+}
+
+define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
+; CHECK-LABEL: @test2
+; CHECK: shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> <i32 11, i32 9, i32 0, i32 10>
+  %elt0 = extractelement <8 x i16> %in, i32 3
+  %elt1 = extractelement <8 x i16> %in, i32 1
+  %elt2 = extractelement <8 x i16> %in2, i32 0
+  %elt3 = extractelement <8 x i16> %in, i32 2
+
+  %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0
+  %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
+  %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
+  %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
+
+  ret <4 x i16> %vec.3
+}
+
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: @test_vcopyq_lane_p64
+; CHECK: extractelement
+; CHECK: insertelement
+; CHECK-NOT: shufflevector
+entry:
+  %elt = extractelement <1 x i64> %b, i32 0
+  %res = insertelement <2 x i64> %a, i64 %elt, i32 1
+  ret <2 x i64> %res
+}
+