mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
Add AVX512 masked leadz instrinsic support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210652 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0e1f75bff8
commit
998d8f50a7
@ -3134,6 +3134,16 @@ let TargetPrefix = "x86" in {
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty],
|
||||
[]>;
|
||||
def int_x86_avx512_mask_lzcnt_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty],
|
||||
[]>;
|
||||
def int_x86_avx512_mask_lzcnt_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty],
|
||||
[]>;
|
||||
}
|
||||
|
||||
// Vector blend
|
||||
|
@ -4354,6 +4354,28 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
|
||||
(VPCONFLICTQrrk VR512:$src1,
|
||||
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
|
||||
|
||||
let Predicates = [HasCDI] in {
|
||||
defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
|
||||
i512mem, i32mem, "{1to16}">,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
|
||||
defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
|
||||
i512mem, i64mem, "{1to8}">,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
}
|
||||
|
||||
def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
|
||||
GR16:$mask),
|
||||
(VPLZCNTDrrk VR512:$src1,
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
|
||||
GR8:$mask),
|
||||
(VPLZCNTQrrk VR512:$src1,
|
||||
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
|
||||
|
||||
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||
|
@ -311,7 +311,6 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
|
||||
|
||||
|
||||
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
|
||||
; CHECK: vpconflictd
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
|
||||
@ -324,6 +323,39 @@ define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
|
||||
; CHECK: movw $-1, %ax
|
||||
; CHECK: vpxor
|
||||
; CHECK: vplzcntd
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
|
||||
|
||||
define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
|
||||
; CHECK: movb $-1, %al
|
||||
; CHECK: vpxor
|
||||
; CHECK: vplzcntq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
|
||||
|
||||
|
||||
define <16 x i32> @test_maskz_lzcnt_d(<16 x i32> %a, i16 %mask) {
|
||||
; CHECK: vplzcntd
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK: vplzcntq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK: vblendmps
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
|
||||
|
Loading…
Reference in New Issue
Block a user