mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
cae9695fbb
This should complete the job started in r231794 and continued in r232045: We want to replace as much custom x86 shuffling via intrinsics as possible because pushing the code down the generic shuffle optimization path allows for better codegen and less complexity in LLVM. AVX2 introduced proper integer variants of the hacked integer insert/extract C intrinsics that were created for this same functionality with AVX1. This should complete the removal of insert/extract128 intrinsics. The Clang precursor patch for this change was checked in at r232109. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232120 91177308-0d34-0410-b5e6-96231b3b80d8
86 lines
3.6 KiB
LLVM
86 lines
3.6 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
|
|
|
|
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
|
; CHECK: vpblendw
|
|
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
|
ret <16 x i16> %res
|
|
}
|
|
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
|
|
|
|
|
|
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
|
|
; CHECK: vpblendd
|
|
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
|
|
ret <4 x i32> %res
|
|
}
|
|
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
|
|
|
|
|
|
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
|
|
; CHECK: vpblendd
|
|
%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
|
|
ret <8 x i32> %res
|
|
}
|
|
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
|
|
|
|
|
|
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
|
; CHECK: vmpsadbw
|
|
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
|
ret <16 x i16> %res
|
|
}
|
|
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
|
|
|
|
|
|
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
|
|
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
|
|
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
|
|
|
|
|
|
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
|
|
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
|
|
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
|
|
|
|
|
|
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
|
|
; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
|
|
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
|
|
|
|
|
|
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
|
|
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
|
|
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
|
|
|
|
|
|
define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
|
|
; CHECK-LABEL: test_x86_avx2_vextracti128:
|
|
; CHECK: vextracti128
|
|
|
|
%res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
|
|
ret <2 x i64> %res
|
|
}
|
|
declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
|
|
|
|
|
|
define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
|
|
; CHECK-LABEL: test_x86_avx2_vinserti128:
|
|
; CHECK: vinserti128
|
|
|
|
%res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
|
|
|