Fix pattern for memory form of PSHUFD for use with FP vectors to remove bitcast to an integer vector that normal code wouldn't have. Also remove bitcasts from code that turns splat vector loads into a shuffle as it was making the broken pattern necessary.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149232 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-16 00:33:10 +00:00 · 2012-01-30 07:50:31 +00:00 · 2012-01-30 07:50:31 +00:00 · cc30006391
commit cc30006391
parent 41b9920a13
3 changed files with 16 additions and 10 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4846,21 +4846,16 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
    int EltNo = (Offset - StartOffset) >> 2;
    int NumElems = VT.getVectorNumElements();

-    EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
    EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
    SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
                             LD->getPointerInfo().getWithOffset(StartOffset),
                             false, false, false, 0);

-    // Canonicalize it to a v4i32 or v8i32 shuffle.
    SmallVector<int, 8> Mask;
    for (int i = 0; i < NumElems; ++i)
      Mask.push_back(EltNo);

-    V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
-    return DAG.getNode(ISD::BITCAST, dl, NVT,
-                       DAG.getVectorShuffle(CanonVT, dl, V1,
-                                            DAG.getUNDEF(CanonVT),&Mask[0]));
+    return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
  }

  return SDValue();
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -3998,8 +3998,7 @@ let Predicates = [HasAVX] in {
  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
                                   (i8 imm:$imm))),
            (VPSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
-                                   (i8 imm:$imm))),
+  def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
            (VPSHUFDmi addr:$src1, imm:$imm)>;
  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
            (VPSHUFDri VR128:$src1, imm:$imm)>;
@ -4051,8 +4050,7 @@ let Predicates = [HasSSE2] in {
  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
                                   (i8 imm:$imm))),
            (PSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
-                                   (i8 imm:$imm))),
+  def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
            (PSHUFDmi addr:$src1, imm:$imm)>;
  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
            (PSHUFDri VR128:$src1, imm:$imm)>;
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@ -96,3 +96,16 @@ define i32 @test10(<4 x i32> %a) nounwind {
  %r = extractelement <8 x i32> %b, i32 2
  ret i32 %r
 }
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; CHECK: pshufd $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: pshufd $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}