diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ae573284d7d..be3ecd7e531 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4066,11 +4066,11 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -// PromoteSplatv8v16 - All i16 and i8 vector types can't be used directly by +// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by // a generic shuffle instruction because the target has no such instructions. // Generate shuffles which repeat i16 and i8 several times until they can be // represented by v4f32 and then be manipulated by target suported shuffles. -static SDValue PromoteSplatv8v16(SDValue V, SelectionDAG &DAG, int &EltNo) { +static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { EVT VT = V.getValueType(); int NumElems = VT.getVectorNumElements(); DebugLoc dl = V.getDebugLoc(); @@ -4162,8 +4162,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { } // Make this 128-bit vector duplicate i8 and i16 elements - if (NumElems > 4) - V1 = PromoteSplatv8v16(V1, DAG, EltNo); + EVT EltVT = SrcVT.getVectorElementType(); + if (NumElems > 4 && (EltVT == MVT::i8 || EltVT == MVT::i16)) + V1 = PromoteSplati8i16(V1, DAG, EltNo); // Recreate the 256-bit vector and place the same 128-bit vector // into the low and high part. This is necessary because we want @@ -6027,8 +6028,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, return PromoteVectorToScalarSplat(SVOp, DAG); // Handle splats by matching through known shuffle masks - if ((VT.is128BitVector() && NumElem <= 4) || - (VT.is256BitVector() && NumElem <= 8)) + if (VT.is128BitVector() && NumElem <= 4) return SDValue(); // All i16 and i8 vector types can't be used directly by a generic shuffle diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index 243ab9ba318..ca35b7f265c 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -51,8 +51,9 @@ entry: ; To: ; shuffle (vload ptr)), undef, <1, 1, 1, 1> ; CHECK: vmovaps -; CHECK-NEXT: vpextrd -define void @funcE() nounwind { +; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $-1 +define <8 x float> @funcE() nounwind { allocas: %udx495 = alloca [18 x [18 x float]], align 32 br label %for_test505.preheader @@ -74,7 +75,7 @@ load.i1247: ; preds = %for_exit499 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] - ret void + ret <8 x float> %load_broadcast12281250 } ; CHECK: vpshufd $0 @@ -87,3 +88,20 @@ define <8 x float> @funcF(i32* %ptr) nounwind { ret <8 x float> %tmp } +; CHECK: vinsertf128 $1 +; CHECK-NEXT: vpermilps $0 +define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $85 +define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle +} +