mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
0c8607ba6a
The optimization handles esoteric cases but adds a lot of complexity both to the X86 backend and to other backends. This optimization disables an important canonicalization of chains of SEXT nodes and makes SEXT and ZEXT asymmetrical. Disabling the canonicalization of consecutive SEXT nodes into a single node disables other DAG optimizations that assume that there is only one SEXT node. The AVX mask optimizations is one example. Additionally this optimization does not update the cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172968 91177308-0d34-0410-b5e6-96231b3b80d8
145 lines
2.9 KiB
LLVM
Executable File
145 lines
2.9 KiB
LLVM
Executable File
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
|
|
|
|
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
|
; AVX: sext_8i16_to_8i32
|
|
; AVX: vpmovsxwd
|
|
|
|
%B = sext <8 x i16> %A to <8 x i32>
|
|
ret <8 x i32>%B
|
|
}
|
|
|
|
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
|
|
; AVX: sext_4i32_to_4i64
|
|
; AVX: vpmovsxdq
|
|
|
|
%B = sext <4 x i32> %A to <4 x i64>
|
|
ret <4 x i64>%B
|
|
}
|
|
|
|
; AVX: load_sext_test1
|
|
; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test1
|
|
; SSSE3: movq
|
|
; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
|
|
; SSSE3: psrad $16
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test1
|
|
; SSE2: movq
|
|
; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
|
|
; SSE2: psrad $16
|
|
; SSE2: ret
|
|
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
|
|
%X = load <4 x i16>* %ptr
|
|
%Y = sext <4 x i16> %X to <4 x i32>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test2
|
|
; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test2
|
|
; SSSE3: movd
|
|
; SSSE3: pshufb
|
|
; SSSE3: psrad $24
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test2
|
|
; SSE2: movl
|
|
; SSE2: psrad $24
|
|
; SSE2: ret
|
|
define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
|
|
%X = load <4 x i8>* %ptr
|
|
%Y = sext <4 x i8> %X to <4 x i32>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test3
|
|
; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test3
|
|
; SSSE3: movsbq
|
|
; SSSE3: movsbq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test3
|
|
; SSE2: movsbq
|
|
; SSE2: movsbq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
|
|
%X = load <2 x i8>* %ptr
|
|
%Y = sext <2 x i8> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test4
|
|
; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test4
|
|
; SSSE3: movswq
|
|
; SSSE3: movswq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test4
|
|
; SSE2: movswq
|
|
; SSE2: movswq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
|
|
%X = load <2 x i16>* %ptr
|
|
%Y = sext <2 x i16> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test5
|
|
; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test5
|
|
; SSSE3: movslq
|
|
; SSSE3: movslq
|
|
; SSSE3: punpcklqdq
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test5
|
|
; SSE2: movslq
|
|
; SSE2: movslq
|
|
; SSE2: punpcklqdq
|
|
; SSE2: ret
|
|
define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
|
|
%X = load <2 x i32>* %ptr
|
|
%Y = sext <2 x i32> %X to <2 x i64>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
; AVX: load_sext_test6
|
|
; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
|
|
; AVX: ret
|
|
|
|
; SSSE3: load_sext_test6
|
|
; SSSE3: movq
|
|
; SSSE3: punpcklbw
|
|
; SSSE3: psraw $8
|
|
; SSSE3: ret
|
|
|
|
; SSE2: load_sext_test6
|
|
; SSE2: movq
|
|
; SSE2: punpcklbw
|
|
; SSE2: psraw $8
|
|
; SSE2: ret
|
|
define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
|
|
%X = load <8 x i8>* %ptr
|
|
%Y = sext <8 x i8> %X to <8 x i16>
|
|
ret <8 x i16>%Y
|
|
}
|