mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 06:30:16 +00:00
target-specific shuffl DAG combines. We were recognizing the paired shuffles backwards. This code needs to be replaced anyways as we have the same functionality elsewhere, but I'll do the refactoring in a follow-up, this is the minimal fix to the behavior. In addition to fixing miscompiles with the new vector shuffle lowering, it also causes the canonicalization to kick in much better, selecting the smaller encoding variants in lots of places in the new AVX path. This still isn't quite ideal as we don't need both the shufpd and the punpck instructions, but that'll get fixed in a follow-up patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215690 91177308-0d34-0410-b5e6-96231b3b80d8
200 lines
4.1 KiB
LLVM
200 lines
4.1 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
|
|
|
|
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
|
; AVX: sext_8i16_to_8i32
|
|
; AVX: vpmovsxwd
|
|
|
|
%B = sext <8 x i16> %A to <8 x i32>
|
|
ret <8 x i32>%B
|
|
}
|
|
|
|
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
|
|
; AVX: sext_4i32_to_4i64
|
|
; AVX: vpmovsxdq
|
|
|
|
%B = sext <4 x i32> %A to <4 x i64>
|
|
ret <4 x i64>%B
|
|
}
|
|
|
|
; AVX: load_sext_test1
|
|
; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test1
|
|
; SSSE3: movq
|
|
; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
|
|
; SSSE3: psrad $16
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test1
|
|
; SSE2: movq
|
|
; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
|
|
; SSE2: psrad $16
|
|
; SSE2: ret
|
|
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
|
|
%X = load <4 x i16>* %ptr
|
|
%Y = sext <4 x i16> %X to <4 x i32>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test2
|
|
; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test2
|
|
; SSSE3: movd
|
|
; SSSE3: pshufb
|
|
; SSSE3: psrad $24
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test2
|
|
; SSE2: movl
|
|
; SSE2: psrad $24
|
|
; SSE2: ret
|
|
define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
|
|
%X = load <4 x i8>* %ptr
|
|
%Y = sext <4 x i8> %X to <4 x i32>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test3
|
|
; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test3
|
|
; SSSE3: movsbq
|
|
; SSSE3: movsbq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test3
|
|
; SSE2: movsbq
|
|
; SSE2: movsbq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
|
|
%X = load <2 x i8>* %ptr
|
|
%Y = sext <2 x i8> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test4
|
|
; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test4
|
|
; SSSE3: movswq
|
|
; SSSE3: movswq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test4
|
|
; SSE2: movswq
|
|
; SSE2: movswq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
|
|
%X = load <2 x i16>* %ptr
|
|
%Y = sext <2 x i16> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test5
|
|
; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test5
|
|
; SSSE3: movslq
|
|
; SSSE3: movslq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test5
|
|
; SSE2: movslq
|
|
; SSE2: movslq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
|
|
%X = load <2 x i32>* %ptr
|
|
%Y = sext <2 x i32> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test6
|
|
; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test6
|
|
; SSSE3: movq
|
|
; SSSE3: punpcklbw
|
|
; SSSE3: psraw $8
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test6
|
|
; SSE2: movq
|
|
; SSE2: punpcklbw
|
|
; SSE2: psraw $8
|
|
; SSE2: ret
|
|
define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
|
|
%X = load <8 x i8>* %ptr
|
|
%Y = sext <8 x i8> %X to <8 x i16>
|
|
ret <8 x i16>%Y
|
|
}
|
|
|
|
; AVX: sext_4i1_to_4i64
|
|
; AVX: vpslld $31
|
|
; AVX: vpsrad $31
|
|
; AVX: vpmovsxdq
|
|
; AVX: vpmovsxdq
|
|
; AVX: ret
|
|
define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
|
|
%extmask = sext <4 x i1> %mask to <4 x i64>
|
|
ret <4 x i64> %extmask
|
|
}
|
|
|
|
; AVX-LABEL: sext_16i8_to_16i16
|
|
; AVX: vpmovsxbw
|
|
; AVX: vpunpckhqdq
|
|
; AVX: vpmovsxbw
|
|
; AVX: ret
|
|
define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
|
|
%X = load <16 x i8>* %ptr
|
|
%Y = sext <16 x i8> %X to <16 x i16>
|
|
ret <16 x i16> %Y
|
|
}
|
|
|
|
; AVX: sext_4i8_to_4i64
|
|
; AVX: vpslld $24
|
|
; AVX: vpsrad $24
|
|
; AVX: vpmovsxdq
|
|
; AVX: vpmovsxdq
|
|
; AVX: ret
|
|
define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
|
%extmask = sext <4 x i8> %mask to <4 x i64>
|
|
ret <4 x i64> %extmask
|
|
}
|
|
|
|
; AVX: sext_4i8_to_4i64
|
|
; AVX: vpmovsxbd
|
|
; AVX: vpmovsxdq
|
|
; AVX: vpmovsxdq
|
|
; AVX: ret
|
|
define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
|
|
%X = load <4 x i8>* %ptr
|
|
%Y = sext <4 x i8> %X to <4 x i64>
|
|
ret <4 x i64>%Y
|
|
}
|
|
|
|
; AVX: sext_4i16_to_4i64
|
|
; AVX: vpmovsxwd
|
|
; AVX: vpmovsxdq
|
|
; AVX: vpmovsxdq
|
|
; AVX: ret
|
|
define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
|
|
%X = load <4 x i16>* %ptr
|
|
%Y = sext <4 x i16> %X to <4 x i64>
|
|
ret <4 x i64>%Y
|
|
}
|