[x86] Explicitly lower to a blend early if it is trivial to do so for

v8f32 shuffles in the new vector shuffle lowering code. This is very cheap to do and makes it much more clear that anything more expensive but overlapping with this lowering should be selected afterward (for example using AVX2's VPERMPS). However, no functionality changed here as without this code we would fall through to create no-op shuffles of each input and a blend. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218209 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-16 11:30:51 +00:00 · 2014-09-21 11:40:39 +00:00 · 2014-09-21 11:40:39 +00:00 · fdaf59e9b1
commit fdaf59e9b1
parent 29720a4bad
1 changed files with 5 additions and 0 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -7238,6 +7238,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
  case MVT::v2f64:
  case MVT::v4f32:
  case MVT::v4f64:
+  case MVT::v8f32:
    return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
                       DAG.getConstant(BlendMask, MVT::i8));

@ -9319,6 +9320,10 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
      isSingleInputShuffleMask(Mask))
    return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG);

+  if (SDValue Blend =
+          lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, DAG))
+    return Blend;
+
  // Shuffle the input elements into the desired positions in V1 and V2 and
  // blend them together.
  int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};