diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 966aec0df96..de81949e0cc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5679,14 +5679,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } + // We can't directly insert an i8 or i16 into a vector, so zero extend + // it to i32 first. if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); if (VT.is256BitVector()) { - SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); - Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + if (Subtarget->hasAVX()) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } else { + // Without AVX, we need to extend to a 128-bit vector and then + // insert into the 256-bit vector. + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index aad37022d27..df4994da693 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -3249,3 +3249,15 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } + +define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { +; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: +; ALL: # BB#0: +; ALL-NEXT: movzwl (%rdi), %eax +; ALL-NEXT: vmovd %eax, %xmm0 +; ALL-NEXT: retq + %val = load i16, i16* %ptr + %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0 + ret <16 x i16> %i0 +} + diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index f9f4b96be3c..a0f43de7563 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -656,8 +656,6 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX2-NEXT: movl $15, %eax ; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpblendd $15, %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>