diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb0d1adaada..23347c28547 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4846,21 +4846,16 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int NumElems = VT.getVectorNumElements(); - EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32; EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), false, false, false, 0); - // Canonicalize it to a v4i32 or v8i32 shuffle. SmallVector Mask; for (int i = 0; i < NumElems; ++i) Mask.push_back(EltNo); - V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1); - return DAG.getNode(ISD::BITCAST, dl, NVT, - DAG.getVectorShuffle(CanonVT, dl, V1, - DAG.getUNDEF(CanonVT),&Mask[0])); + return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); } return SDValue(); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 43790cf3ea8..01b4dd670fa 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3998,8 +3998,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), (i8 imm:$imm))), (VPSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), (VPSHUFDmi addr:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), (VPSHUFDri VR128:$src1, imm:$imm)>; @@ -4051,8 +4050,7 @@ let Predicates = [HasSSE2] in { def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), (i8 imm:$imm))), (PSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), (PSHUFDmi addr:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), (PSHUFDri VR128:$src1, imm:$imm)>; diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index ee038ce21ee..08b668c983b 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -96,3 +96,16 @@ define i32 @test10(<4 x i32> %a) nounwind { %r = extractelement <8 x i32> %b, i32 2 ret i32 %r } + +define <4 x float> @test11(<4 x float> %a) nounwind { +; CHECK: pshufd $27 + %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %tmp1 +} + +define <4 x float> @test12(<4 x float>* %a) nounwind { +; CHECK: pshufd $27, ( + %tmp0 = load <4 x float>* %a + %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> + ret <4 x float> %tmp1 +}