[x86] Tweak the ordering of unpack matching vs. element insertion, and

don't try to do element insertion for non-zero-index floating point vectors. We don't have any useful patterns or lowering for element insertion into high elements of a floating point vector, and the generic shuffle lowering will end up being better -- namely it will fall back to unpck. But we should try to handle other forms of element insertion before matching unpck patterns. While this doesn't matter much right now, I'm working on a patch that makes unpck matching much more powerful, and that patch will break without this re-ordering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229306 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 17:39:16 +00:00 · 2015-02-15 12:01:14 +00:00 · 2015-02-15 12:01:14 +00:00 · 23b34c287f
commit 23b34c287f
parent 19cd7ccdf1
1 changed files with 21 additions and 17 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -8242,6 +8242,10 @@ static SDValue lowerVectorShuffleAsElementInsertion(
                       ExtVT, V1, V2);
  }

+  // This lowering only works for the low element with floating point vectors.
+  if (VT.isFloatingPoint() && V2Index != 0)
+    return SDValue();
+
  V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
  if (ExtVT != VT)
    V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
@ -8464,12 +8468,6 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
  assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
  assert(Mask[1] >= 2 && "Non-canonicalized blend!");

-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 2))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 3))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
-
  // If we have a single input, insert that into V1 if we can do so cheaply.
  if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
@ -8500,6 +8498,12 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                                  Subtarget, DAG))
      return Blend;

+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(Mask, 0, 2))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+  if (isShuffleEquivalent(Mask, 1, 3))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
  unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
  return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
                     DAG.getConstant(SHUFPDMask, MVT::i8));
@ -8561,17 +8565,17 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
      return Insertion;
  }

+  if (Subtarget->hasSSE41())
+    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+                                                  Subtarget, DAG))
+      return Blend;
+
  // Use dedicated unpack instructions for masks that match their pattern.
  if (isShuffleEquivalent(Mask, 0, 2))
    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
  if (isShuffleEquivalent(Mask, 1, 3))
    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);

-  if (Subtarget->hasSSE41())
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
-                                                  Subtarget, DAG))
-      return Blend;
-
  // Try to use byte rotation instructions.
  // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
  if (Subtarget->hasSSSE3())
@ -8743,12 +8747,6 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                       getV4X86ShuffleImm8ForMask(Mask, DAG));
  }

-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
-
  // There are special ways we can lower some single-element blends. However, we
  // have custom ways we can lower more complex single-element blends below that
  // we defer to if both this and BLENDPS fail to match, so restrict this to
@ -8774,6 +8772,12 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
        return BlendPerm;
  }

+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+  if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
  // Otherwise fall back to a SHUFPS lowering strategy.
  return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
 }