[AVX512] Intrinsics for vextract*x4

This adds the Pat<>'s for the intrinsics.  These are necessary because we
don't lower these intrinsics to SDNodes but match them directly.  See the
rational in the previous commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219362 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2014-10-08 23:25:37 +00:00
parent e868005a27
commit fbd0e464dd
3 changed files with 76 additions and 0 deletions

View File

@ -1791,6 +1791,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">, def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf32x4_512 :
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti32x4_512 :
GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf64x4_512 :
GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti64x4_512 :
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
} }
// Conditional load ops // Conditional load ops

View File

@ -449,6 +449,29 @@ multiclass vextract_for_size<int Opcode,
def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))), def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
(AltTo.VT (AltTo.VT
(EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>; (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
// Intrinsic call with masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x4_512")
VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
(!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
(v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
VR512:$src1, imm:$idx)>;
// Intrinsic call with zero-masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x4_512")
VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
(!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
(v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
VR512:$src1, imm:$idx)>;
// Intrinsic call without masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x4_512")
VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
(!cast<Instruction>(NAME # To.EltSize # "x4rr")
VR512:$src1, imm:$idx)>;
} }
multiclass vextract_for_type<ValueType EltVT32, int Opcode32, multiclass vextract_for_type<ValueType EltVT32, int Opcode32,

View File

@ -949,3 +949,39 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
} }
declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4:
; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextracti64x4:
; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
%res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4:
; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
%res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
define <4 x double> @test_vextractf64x4(<8 x double> %a) {
; CHECK-LABEL: test_vextractf64x4:
; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
%res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)