[AVX] Improve insertion of i8 or i16 into low element of 256-bit zero vector

Without this patch, we split the 256-bit vector into halves and produced something like:
	movzwl	(%rdi), %eax
	vmovd	%eax, %xmm0
	vxorps	%xmm1, %xmm1, %xmm1
	vblendps	$15, %ymm0, %ymm1, %ymm0 ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]

Now, we eliminate the xor and blend because those zeros are free with the vmovd:
        movzwl  (%rdi), %eax
        vmovd   %eax, %xmm0

This should be the final fix needed to resolve PR22685:
https://llvm.org/bugs/show_bug.cgi?id=22685




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233941 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2015-04-02 20:21:52 +00:00
parent 19443c1bcb
commit 5b93ab6cde
3 changed files with 25 additions and 5 deletions

View File

@ -5679,14 +5679,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
// We can't directly insert an i8 or i16 into a vector, so zero extend
// it to i32 first.
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
if (Subtarget->hasAVX()) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
} else {
// Without AVX, we need to extend to a 128-bit vector and then
// insert into the 256-bit vector.
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
}
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);

View File

@ -3249,3 +3249,15 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
ret <16 x i16> %shuffle
}
define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
; ALL: # BB#0:
; ALL-NEXT: movzwl (%rdi), %eax
; ALL-NEXT: vmovd %eax, %xmm0
; ALL-NEXT: retq
%val = load i16, i16* %ptr
%i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
ret <16 x i16> %i0
}

View File

@ -656,8 +656,6 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: movl $15, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpblendd $15, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>