[DAGCombiner] Combine shuffles of BUILD_VECTOR and SCALAR_TO_VECTOR

This patch attempts to fold the shuffling of 'scalar source' inputs - BUILD_VECTOR and SCALAR_TO_VECTOR nodes - if the shuffle node is the only user. This folds away a lot of unnecessary shuffle nodes, and allows quite a bit of constant folding that was being missed.

Differential Revision: http://reviews.llvm.org/D8516

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234004 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2015-04-03 10:02:21 +00:00
parent c39f5dd0e2
commit 4e60da755a
11 changed files with 140 additions and 224 deletions

View File

@ -11980,6 +11980,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
SmallVector<SDValue, 8> Ops;
for (int M : SVN->getMask()) {
SDValue Op = DAG.getUNDEF(VT.getScalarType());
if (M >= 0) {
int Idx = M % NumElts;
SDValue &S = (M < (int)NumElts ? N0 : N1);
if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
if (Idx == 0)
Op = S.getOperand(0);
} else {
// Operand can't be combined - bail out.
break;
}
}
Ops.push_back(Op);
}
if (Ops.size() == VT.getVectorNumElements()) {
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
if (SVT.isInteger())
for (SDValue &Op : Ops)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
Op = TLI.isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
: DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
}
}
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
// back to their original types.

View File

@ -1086,7 +1086,7 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
entry:
%c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
%d = insertelement <2 x i32> undef, i32 %c, i32 0

View File

@ -1,22 +1,8 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
; The mask:
; CHECK: lCPI0_0:
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 6 ; 0x6
; CHECK: .byte 255 ; 0xff
; The second vector is legalized to undef and the elements of the first vector
; are used instead.
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 4 ; 0x4
; CHECK: .byte 6 ; 0x6
; CHECK: .byte 0 ; 0x0
; CHECK: test1
; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000
define <8 x i1> @test1() {
entry:
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@ -30,18 +16,16 @@ entry:
; CHECK: lCPI1_0:
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 10 ; 0xa
; CHECK: .byte 12 ; 0xc
; CHECK: .byte 14 ; 0xe
; CHECK: .byte 7 ; 0x7
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 1 ; 0x1
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: test2
; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_1@PAGE
; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE
; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF]
define <8 x i1>@test2() {
bb:
%Shuff = shufflevector <8 x i1> zeroinitializer,
@ -51,28 +35,8 @@ bb:
ret <8 x i1> %Shuff
}
; CHECK: lCPI2_0:
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 6 ; 0x6
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 10 ; 0xa
; CHECK: .byte 12 ; 0xc
; CHECK: .byte 14 ; 0xe
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 6 ; 0x6
; CHECK: .byte 255 ; 0xff
; CHECK: .byte 10 ; 0xa
; CHECK: .byte 12 ; 0xc
; CHECK: .byte 14 ; 0xe
; CHECK: .byte 0 ; 0x0
; CHECK: test3
; CHECK: adrp x[[REG3:[0-9]+]], lCPI2_0@PAGE
; CHECK: ldr q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1@PAGEOFF]
; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
; CHECK: movi.4s v{{[0-9]+}}, #0x1
define <16 x i1> @test3(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
@ -81,29 +45,26 @@ bb:
i32 14, i32 0>
ret <16 x i1> %Shuff
}
; CHECK: lCPI3_1:
; CHECK: lCPI3_0:
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 1 ; 0x1
; CHECK: .byte 2 ; 0x2
; CHECK: .byte 18 ; 0x12
; CHECK: .byte 4 ; 0x4
; CHECK: .byte 5 ; 0x5
; CHECK: .byte 6 ; 0x6
; CHECK: .byte 7 ; 0x7
; CHECK: .byte 8 ; 0x8
; CHECK: .byte 31 ; 0x1f
; CHECK: .byte 10 ; 0xa
; CHECK: .byte 30 ; 0x1e
; CHECK: .byte 12 ; 0xc
; CHECK: .byte 13 ; 0xd
; CHECK: .byte 14 ; 0xe
; CHECK: .byte 15 ; 0xf
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: _test4:
; CHECK: ldr q[[REG1:[0-9]+]]
; CHECK: movi.2d v[[REG0:[0-9]+]], #0000000000000000
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_1@PAGE
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
define <16 x i1> @test4(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> zeroinitializer,

View File

@ -9,58 +9,23 @@ define <16 x i8> @foo() nounwind ssp {
}
; CHECK: .LCPI0_0:
; CHECK: .byte 31
; CHECK: .byte 26
; CHECK: .byte 21
; CHECK: .byte 16
; CHECK: .byte 11
; CHECK: .byte 6
; CHECK: .byte 1
; CHECK: .byte 28
; CHECK: .byte 23
; CHECK: .byte 18
; CHECK: .byte 13
; CHECK: .byte 8
; CHECK: .byte 3
; CHECK: .byte 30
; CHECK: .byte 25
; CHECK: .byte 20
; CHECK: .LCPI0_1:
; CHECK: .byte 0
; CHECK: .byte 1
; CHECK: .byte 2
; CHECK: .byte 3
; CHECK: .byte 4
; CHECK: .byte 5
; CHECK: .byte 6
; CHECK: .byte 7
; CHECK: .byte 8
; CHECK: .byte 9
; CHECK: .byte 10
; CHECK: .byte 11
; CHECK: .byte 12
; CHECK: .byte 13
; CHECK: .byte 14
; CHECK: .byte 15
; CHECK: .LCPI0_2:
; CHECK: .byte 16
; CHECK: .byte 17
; CHECK: .byte 18
; CHECK: .byte 19
; CHECK: .byte 20
; CHECK: .byte 21
; CHECK: .byte 22
; CHECK: .byte 23
; CHECK: .byte 24
; CHECK: .byte 25
; CHECK: .byte 26
; CHECK: .byte 27
; CHECK: .byte 28
; CHECK: .byte 29
; CHECK: .byte 30
; CHECK: .byte 31
; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .byte 13
; CHECK: .byte 18
; CHECK: .byte 23
; CHECK: .byte 28
; CHECK: .byte 1
; CHECK: .byte 6
; CHECK: .byte 11
; CHECK: foo:
; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_0@toc@ha
; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_0@toc@l
; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}

View File

@ -75,8 +75,7 @@ define i64 @t5(i32 %a, i32 %b) nounwind readnone {
; CHECK-NEXT: movd
; CHECK-NEXT: movd
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
; CHECK-NEXT: movd %xmm0, %rax
; CHECK-NEXT: movd %xmm1, %rax
; CHECK-NEXT: retq
%v0 = insertelement <2 x i32> undef, i32 %a, i32 0
%v1 = insertelement <2 x i32> %v0, i32 %b, i32 1

View File

@ -1026,29 +1026,24 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
}
; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
; X32-LABEL: insertps_pr20411:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X32-NEXT: movdqu %xmm1, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: insertps_pr20411:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
; X64-NEXT: movdqu %xmm1, (%rdi)
; X64-LABEL: insertps_pr20411:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X64-NEXT: movdqu %xmm1, (%rdi)
; X64-NEXT: retq
%gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; 4 5 6 7
%shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
%shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
%ptrcast = bitcast i32* %RET to <4 x i32>*
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
%shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
%ptrcast = bitcast i32* %RET to <4 x i32>*
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
ret void
}

View File

@ -8,7 +8,7 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: shll $12, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; CHECK-NEXT: movlpd %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp12 = shl i32 %a, 12

View File

@ -6,7 +6,7 @@ define x86_mmx @t0(i32 %A) nounwind {
; X86-32-LABEL: t0:
; X86-32: ## BB#0:
; X86-32: movd {{[0-9]+}}(%esp), %xmm0
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; X86-32-NEXT: movlpd %xmm0, (%esp)
; X86-32-NEXT: movq (%esp), %mm0
; X86-32-NEXT: addl $12, %esp

View File

@ -17,7 +17,7 @@ define void @test1() {
define void @test2() {
;CHECK-LABEL: @test2
;CHECK: pshufd
;CHECK: pcmpeqd
store <1 x i64> < i64 -1 >, <1 x i64>* @M1
store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
ret void

View File

@ -634,28 +634,16 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE: # BB#0:
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@ -663,29 +651,18 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
}
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE: # BB#0:
; SSE-NEXT: shll $8, %edi
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $2, %edi, %xmm0
; SSE-NEXT: retq
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: shll $8, %edi
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -695,14 +672,16 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSE: # BB#0:
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE-NEXT: shll $8, %edi
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $7, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; AVX-NEXT: shll $8, %edi
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $7, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
@ -710,32 +689,18 @@ define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(
}
define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pslld $24, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pslld $24, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
; SSE-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE: # BB#0:
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $1, %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslld $24, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $1, %eax, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

View File

@ -1384,16 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $1, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $1, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
@ -1403,16 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $5, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $5, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
@ -1422,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
; SSE: # BB#0:
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $7, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $7, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
@ -1439,16 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pinsrw $2, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: vpxor %xmm0, %xmm0
; AVX-NEXT: vpinsrw $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>