diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0ed30f5a6f0..63c7ab8fef6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7468,9 +7468,8 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); } -static SDValue -NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -7495,33 +7494,43 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8 || - VT == MVT::v16i16 || VT == MVT::v32i8) { + if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || + VT == MVT::v32i8) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); - } else if ((VT == MVT::v4i32 || - (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. - // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(0), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget, + dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(1), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, + dl); } } + } else if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Subtarget->hasSSE2()) { + // Emit movq and vmovq to copy an i64 or f64 to a vector and zero the + // other bits. + if (ISD::isBuildVectorAllZeros(V2.getNode())) { + MVT NewVT = SVOp->getSimpleValueType(0); + if (isCommutedMOVLMask(SVOp->getMask(), NewVT, true, false)) + return getVZextMovL(VT, NewVT, SVOp->getOperand(0), DAG, Subtarget, dl); + } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { + MVT NewVT = SVOp->getSimpleValueType(0); + if (isMOVLMask(SVOp->getMask(), NewVT)) + return getVZextMovL(VT, NewVT, SVOp->getOperand(1), DAG, Subtarget, dl); + } } return SDValue(); } diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index 02aa617c56c..e472042b2ce 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -306,3 +306,11 @@ define void @test20() { store <3 x double> %a1, <3 x double>* undef, align 1 ret void } + +define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { +; CHECK-LABEL: test_insert_64_zext +; CHECK-NOT: xor +; CHECK: vmovq + %1 = shufflevector <2 x i64> %i, <2 x i64> , <2 x i32> + ret <2 x i64> %1 +} diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 628dba0b101..0a17eaca411 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -221,3 +221,11 @@ entry: %double2float.i = fptrunc <4 x double> %0 to <4 x float> ret <4 x float> %double2float.i } + +define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { +; CHECK-LABEL: test_insert_64_zext +; CHECK-NOT: xor +; CHECK: movq + %1 = shufflevector <2 x i64> %i, <2 x i64> , <2 x i32> + ret <2 x i64> %1 +}