mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-07 11:33:44 +00:00
ec35069525
The combine that forms extloads used to be disabled on vector types, because "None of the supported targets knows how to perform load and sign extend on vectors in one instruction." That's not entirely true, since at least SSE4.1 X86 knows how to do those sextloads/zextloads (with PMOVS/ZX). But there are several aspects to getting this right. First, vector extloads are controlled by a profitability callback. For instance, on ARM, several instructions have folded extload forms, so it's not always beneficial to create an extload node (and trying to match extloads is a whole 'nother can of worms). The interesting optimization enables folding of s/zextloads to illegal (splittable) vector types, expanding them into smaller legal extloads. It's not ideal (it introduces some legalization-like behavior in the combine) but it's better than the obvious alternative: form illegal extloads, and later try to split them up. If you do that, you might generate extloads that can't be split up, but have a valid ext+load expansion. At vector-op legalization time, it's too late to generate this kind of code, so you end up forced to scalarize. It's better to just avoid creating egregiously illegal nodes. This optimization is enabled unconditionally on X86. Note that the splitting combine is happy with "custom" extloads. As is, this bypasses the actual custom lowering, and just unrolls the extload. But from what I've seen, this is still much better than the current custom lowering, which does some kind of unrolling at the end anyway (see for instance load_sext_4i8_to_4i64 on SSE2, and the added FIXME). Also note that the existing combine that forms extloads is now also enabled on legal vectors. This doesn't have a big effect on X86 (because sext+load is usually combined to sext_inreg+aextload). On ARM it fires on some rare occasions; that's for a separate commit. Differential Revision: http://reviews.llvm.org/D6904 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228325 91177308-0d34-0410-b5e6-96231b3b80d8
343 lines
13 KiB
LLVM
343 lines
13 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
|
|
|
define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
|
; SSE2-LABEL: zext_8i16_to_8i32:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
|
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; SSSE3-LABEL: zext_8i16_to_8i32:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
|
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
|
; SSSE3-NEXT: pand %xmm1, %xmm2
|
|
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSSE3-NEXT: pand %xmm0, %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
|
; SSSE3-NEXT: retq
|
|
;
|
|
; SSE41-LABEL: zext_8i16_to_8i32:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxwd %xmm0, %xmm2
|
|
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
|
; SSE41-NEXT: pand %xmm1, %xmm2
|
|
; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSE41-NEXT: pand %xmm0, %xmm1
|
|
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: zext_8i16_to_8i32:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
|
; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: zext_8i16_to_8i32:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxwd %xmm0, %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%B = zext <8 x i16> %A to <8 x i32>
|
|
ret <8 x i32>%B
|
|
}
|
|
|
|
define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
|
|
; SSE2-LABEL: zext_4i32_to_4i64:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
|
|
; SSE2-NEXT: pand %xmm3, %xmm2
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
|
; SSE2-NEXT: pand %xmm3, %xmm1
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; SSSE3-LABEL: zext_4i32_to_4i64:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
|
|
; SSSE3-NEXT: pand %xmm3, %xmm2
|
|
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
|
; SSSE3-NEXT: pand %xmm3, %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
|
; SSSE3-NEXT: retq
|
|
;
|
|
; SSE41-LABEL: zext_4i32_to_4i64:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxdq %xmm0, %xmm2
|
|
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
|
|
; SSE41-NEXT: pand %xmm3, %xmm2
|
|
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
|
; SSE41-NEXT: pand %xmm3, %xmm1
|
|
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: zext_4i32_to_4i64:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: zext_4i32_to_4i64:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxdq %xmm0, %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%B = zext <4 x i32> %A to <4 x i64>
|
|
ret <4 x i64>%B
|
|
}
|
|
|
|
define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
|
|
; SSE2-LABEL: zext_8i8_to_8i32:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
|
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; SSSE3-LABEL: zext_8i8_to_8i32:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
|
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
|
|
; SSSE3-NEXT: pand %xmm1, %xmm2
|
|
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSSE3-NEXT: pand %xmm0, %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
|
; SSSE3-NEXT: retq
|
|
;
|
|
; SSE41-LABEL: zext_8i8_to_8i32:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxwd %xmm0, %xmm2
|
|
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
|
|
; SSE41-NEXT: pand %xmm1, %xmm2
|
|
; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; SSE41-NEXT: pand %xmm0, %xmm1
|
|
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: zext_8i8_to_8i32:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpmovzxwd %xmm0, %xmm1
|
|
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: zext_8i8_to_8i32:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxwd %xmm0, %ymm0
|
|
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%t = zext <8 x i8> %z to <8 x i32>
|
|
ret <8 x i32> %t
|
|
}
|
|
|
|
; PR17654
|
|
define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
|
|
; SSE2-LABEL: zext_16i8_to_16i16:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
|
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; SSSE3-LABEL: zext_16i8_to_16i16:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
|
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
|
; SSSE3-NEXT: pand %xmm1, %xmm2
|
|
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
|
; SSSE3-NEXT: pand %xmm0, %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
|
; SSSE3-NEXT: retq
|
|
;
|
|
; SSE41-LABEL: zext_16i8_to_16i16:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxbw %xmm0, %xmm2
|
|
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
|
; SSE41-NEXT: pand %xmm1, %xmm2
|
|
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
|
; SSE41-NEXT: pand %xmm0, %xmm1
|
|
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: zext_16i8_to_16i16:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
|
; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: zext_16i8_to_16i16:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxbw %xmm0, %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%t = zext <16 x i8> %z to <16 x i16>
|
|
ret <16 x i16> %t
|
|
}
|
|
|
|
define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
|
|
; SSE2-LABEL: load_zext_16i8_to_16i16:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; SSE2-NEXT: punpcklbw %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
; SSE2-NEXT: punpckhbw %xmm1, %xmm1 # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
; SSE2-NEXT: retq
|
|
|
|
; SSSE3-LABEL: load_zext_16i8_to_16i16:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
|
; SSSE3-NEXT: punpcklbw %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm0
|
|
; SSSE3-NEXT: punpckhbw %xmm1, %xmm1 # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm1
|
|
; SSSE3-NEXT: retq
|
|
|
|
; SSE41-LABEL: load_zext_16i8_to_16i16:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxbw (%rdi), %xmm0
|
|
; SSE41-NEXT: pmovzxbw 8(%rdi), %xmm1
|
|
; SSE41-NEXT: retq
|
|
|
|
; AVX1-LABEL: load_zext_16i8_to_16i16:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpmovzxbw (%rdi), %xmm0
|
|
; AVX1-NEXT: vpmovzxbw 8(%rdi), %xmm1
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
|
|
; AVX2-LABEL: load_zext_16i8_to_16i16:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxbw (%rdi), %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%X = load <16 x i8>* %ptr
|
|
%Y = zext <16 x i8> %X to <16 x i16>
|
|
ret <16 x i16> %Y
|
|
}
|
|
|
|
define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
|
|
; SSE2-LABEL: load_zext_8i16_to_8i32:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; SSE2-NEXT: punpcklwd %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
; SSE2-NEXT: punpckhwd %xmm1, %xmm1 # xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
; SSE2-NEXT: retq
|
|
|
|
; SSSE3-LABEL: load_zext_8i16_to_8i32:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
|
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
|
; SSSE3-NEXT: punpcklwd %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm0
|
|
; SSSE3-NEXT: punpckhwd %xmm1, %xmm1 # xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm1
|
|
; SSSE3-NEXT: retq
|
|
|
|
; SSE41-LABEL: load_zext_8i16_to_8i32:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxwd (%rdi), %xmm0
|
|
; SSE41-NEXT: pmovzxwd 8(%rdi), %xmm1
|
|
; SSE41-NEXT: retq
|
|
|
|
; AVX1-LABEL: load_zext_8i16_to_8i32:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpmovzxwd (%rdi), %xmm0
|
|
; AVX1-NEXT: vpmovzxwd 8(%rdi), %xmm1
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
|
|
; AVX2-LABEL: load_zext_8i16_to_8i32:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxwd (%rdi), %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%X = load <8 x i16>* %ptr
|
|
%Y = zext <8 x i16> %X to <8 x i32>
|
|
ret <8 x i32>%Y
|
|
}
|
|
|
|
define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
|
|
; SSE2-LABEL: load_zext_4i32_to_4i64:
|
|
; SSE2: # BB#0: # %entry
|
|
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
|
; SSE2-NEXT: pshufd $212, %xmm1, %xmm0 # xmm0 = xmm1[0,1,1,3]
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
; SSE2-NEXT: pshufd $250, %xmm1, %xmm1 # xmm1 = xmm1[2,2,3,3]
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
; SSE2-NEXT: retq
|
|
|
|
; SSSE3-LABEL: load_zext_4i32_to_4i64:
|
|
; SSSE3: # BB#0: # %entry
|
|
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
|
; SSSE3-NEXT: pshufd $212, %xmm1, %xmm0 # xmm0 = xmm1[0,1,1,3]
|
|
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm0
|
|
; SSSE3-NEXT: pshufd $250, %xmm1, %xmm1 # xmm1 = xmm1[2,2,3,3]
|
|
; SSSE3-NEXT: pand %xmm2, %xmm1
|
|
; SSSE3-NEXT: retq
|
|
|
|
; SSE41-LABEL: load_zext_4i32_to_4i64:
|
|
; SSE41: # BB#0: # %entry
|
|
; SSE41-NEXT: pmovzxdq (%rdi), %xmm0
|
|
; SSE41-NEXT: pmovzxdq 8(%rdi), %xmm1
|
|
; SSE41-NEXT: retq
|
|
|
|
; AVX1-LABEL: load_zext_4i32_to_4i64:
|
|
; AVX1: # BB#0: # %entry
|
|
; AVX1-NEXT: vpmovzxdq (%rdi), %xmm0
|
|
; AVX1-NEXT: vpmovzxdq 8(%rdi), %xmm1
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
|
|
; AVX2-LABEL: load_zext_4i32_to_4i64:
|
|
; AVX2: # BB#0: # %entry
|
|
; AVX2-NEXT: vpmovzxdq (%rdi), %ymm0
|
|
; AVX2-NEXT: retq
|
|
entry:
|
|
%X = load <4 x i32>* %ptr
|
|
%Y = zext <4 x i32> %X to <4 x i64>
|
|
ret <4 x i64>%Y
|
|
}
|