AVX-512: changes in intrinsics

1) Changed gather and scatter intrinsics. Now they are aligned with GCC built-ins. There is no more non-masked form. Masked intrinsic receives -1 if all lanes are executed.
2) I changed the function that works with intrinsics inside X86ISelLowering.cpp. I put all intrinsics in one table. I did it for INTRINSICS_W_CHAIN and plan to put all intrinsics from WO_CHAIN set to the same table in order to avoid the long-long "switch". (I wanted to use static map initialization that allowed by C++11 but I wasn't able to compile it on VS2012).
3) I added gather/scatter prefetch intrinsics.
4) I fixed MRMm encoding for masked instructions.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208522 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky
2014-05-12 07:18:51 +00:00
parent f37d8e6b1a
commit 1cec507d6d
6 changed files with 373 additions and 439 deletions

View File

@@ -3029,141 +3029,104 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Gather and Scatter ops
let TargetPrefix = "x86" in {
def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gathersiv16sf">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8df">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16sf">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gatherdpi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherqpq512">,
def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gathersiv8di">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherqpi512">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty],
def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gathersiv16si">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8di">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
// scatter
def int_x86_avx512_scatter_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpd512">,
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdps512">,
def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scattersiv16sf">,
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpd512">,
def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8df">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqps512">,
def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16sf">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterdpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f64_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scatterdps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_v16f32_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterqpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f64_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterqps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f32_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty,
llvm_v8i64_ty, llvm_i32_ty],
def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scattersiv8di">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpi512">,
def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scattersiv16si">,
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8di">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpi512">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty],
def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16si">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty,
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scatterdpq512">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scatterdpi512">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterqpq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i32_ty],
[]>;
def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterqpi512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i32_ty,
llvm_i32_ty],
[]>;
// gather prefetch
def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
// scatter prefetch
def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
}
// AVX-512 conflict detection