diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 54d824419fd..4b11f2b61f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1498,6 +1498,7 @@ void X86TargetLowering::resetOperationActions() { } // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::VSELECT); @@ -16151,6 +16152,44 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformConcatCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + // Creating a v8i16 from a v4i16 argument and an undef runs into trouble in + // type legalization and ends up spilling to the stack. Avoid that by + // creating a vector first and bitcasting the result rather than + // bitcasting the source then creating the vector. Similar problems with + // v8i8. + + // No point in doing this after legalize, so early exit for that. + if (!DCI.isBeforeLegalize()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (VT.getSizeInBits() == 128 && N->getNumOperands() == 2 && + Op1->getOpcode() == ISD::UNDEF && + Op0->getOpcode() == ISD::BITCAST && + !TLI.isTypeLegal(Op0->getValueType(0)) && + TLI.isTypeLegal(Op0->getOperand(0)->getValueType(0))) { + SDValue Scalar = Op0->getOperand(0); + // Any legal type here will be a simple value type. + MVT SVT = Scalar->getValueType(0).getSimpleVT(); + // As a special case, bail out on MMX values. + if (SVT == MVT::x86mmx) + return SDValue(); + EVT NVT = MVT::getVectorVT(SVT, 2); + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); + return Res; + } + + return SDValue(); +} + /// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -19029,6 +19068,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILP: case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); + case ISD::CONCAT_VECTORS: return PerformConcatCombine(N, DAG, DCI, Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); } diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll index b85b4c39ea8..f0e468f53cb 100644 --- a/test/CodeGen/X86/pmovext.ll +++ b/test/CodeGen/X86/pmovext.ll @@ -18,5 +18,28 @@ define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable } declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone - declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind + +; rdar://15245794 + +define <4 x i32> @foo0(double %v.coerce) nounwind ssp { +; CHECK-LABEL: foo0 +; CHECK: pmovzxwd %xmm0, %xmm0 +; CHECK-NEXT: ret + %tmp = bitcast double %v.coerce to <4 x i16> + %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> + %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind + ret <4 x i32> %tmp2 +} + +define <8 x i16> @foo1(double %v.coerce) nounwind ssp { +; CHECK-LABEL: foo1 +; CHECK: pmovzxbw %xmm0, %xmm0 +; CHECK-NEXT: ret + %tmp = bitcast double %v.coerce to <8 x i8> + %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> + %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1) + ret <8 x i16> %tmp2 +} + +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone