mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
AVX-512: PMIN/PMAX intrinsics and patterns
Patch by Cameron McInally <cameron.mcinally@nyu.edu> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193497 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cba7d7d579
commit
62d66cbec5
@ -2719,6 +2719,32 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
|
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
|
||||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
||||||
llvm_v8f64_ty], [IntrNoMem]>;
|
llvm_v8f64_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">,
|
||||||
|
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||||
|
llvm_v16i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pmaxu_q : GCCBuiltin<"__builtin_ia32_pmaxuq512">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||||
|
llvm_v8i64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd512">,
|
||||||
|
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||||
|
llvm_v16i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pmaxs_q : GCCBuiltin<"__builtin_ia32_pmaxsq512">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||||
|
llvm_v8i64_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_x86_avx512_pminu_d : GCCBuiltin<"__builtin_ia32_pminud512">,
|
||||||
|
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||||
|
llvm_v16i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pminu_q : GCCBuiltin<"__builtin_ia32_pminuq512">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||||
|
llvm_v8i64_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd512">,
|
||||||
|
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||||
|
llvm_v16i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_pmins_q : GCCBuiltin<"__builtin_ia32_pminsq512">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||||
|
llvm_v8i64_ty], [IntrNoMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
|
@ -11173,24 +11173,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case Intrinsic::x86_avx2_pmaxu_b:
|
case Intrinsic::x86_avx2_pmaxu_b:
|
||||||
case Intrinsic::x86_avx2_pmaxu_w:
|
case Intrinsic::x86_avx2_pmaxu_w:
|
||||||
case Intrinsic::x86_avx2_pmaxu_d:
|
case Intrinsic::x86_avx2_pmaxu_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxu_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxu_q:
|
||||||
case Intrinsic::x86_sse2_pminu_b:
|
case Intrinsic::x86_sse2_pminu_b:
|
||||||
case Intrinsic::x86_sse41_pminuw:
|
case Intrinsic::x86_sse41_pminuw:
|
||||||
case Intrinsic::x86_sse41_pminud:
|
case Intrinsic::x86_sse41_pminud:
|
||||||
case Intrinsic::x86_avx2_pminu_b:
|
case Intrinsic::x86_avx2_pminu_b:
|
||||||
case Intrinsic::x86_avx2_pminu_w:
|
case Intrinsic::x86_avx2_pminu_w:
|
||||||
case Intrinsic::x86_avx2_pminu_d:
|
case Intrinsic::x86_avx2_pminu_d:
|
||||||
|
case Intrinsic::x86_avx512_pminu_d:
|
||||||
|
case Intrinsic::x86_avx512_pminu_q:
|
||||||
case Intrinsic::x86_sse41_pmaxsb:
|
case Intrinsic::x86_sse41_pmaxsb:
|
||||||
case Intrinsic::x86_sse2_pmaxs_w:
|
case Intrinsic::x86_sse2_pmaxs_w:
|
||||||
case Intrinsic::x86_sse41_pmaxsd:
|
case Intrinsic::x86_sse41_pmaxsd:
|
||||||
case Intrinsic::x86_avx2_pmaxs_b:
|
case Intrinsic::x86_avx2_pmaxs_b:
|
||||||
case Intrinsic::x86_avx2_pmaxs_w:
|
case Intrinsic::x86_avx2_pmaxs_w:
|
||||||
case Intrinsic::x86_avx2_pmaxs_d:
|
case Intrinsic::x86_avx2_pmaxs_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxs_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxs_q:
|
||||||
case Intrinsic::x86_sse41_pminsb:
|
case Intrinsic::x86_sse41_pminsb:
|
||||||
case Intrinsic::x86_sse2_pmins_w:
|
case Intrinsic::x86_sse2_pmins_w:
|
||||||
case Intrinsic::x86_sse41_pminsd:
|
case Intrinsic::x86_sse41_pminsd:
|
||||||
case Intrinsic::x86_avx2_pmins_b:
|
case Intrinsic::x86_avx2_pmins_b:
|
||||||
case Intrinsic::x86_avx2_pmins_w:
|
case Intrinsic::x86_avx2_pmins_w:
|
||||||
case Intrinsic::x86_avx2_pmins_d: {
|
case Intrinsic::x86_avx2_pmins_d:
|
||||||
|
case Intrinsic::x86_avx512_pmins_d:
|
||||||
|
case Intrinsic::x86_avx512_pmins_q: {
|
||||||
unsigned Opcode;
|
unsigned Opcode;
|
||||||
switch (IntNo) {
|
switch (IntNo) {
|
||||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||||
@ -11200,6 +11208,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case Intrinsic::x86_avx2_pmaxu_b:
|
case Intrinsic::x86_avx2_pmaxu_b:
|
||||||
case Intrinsic::x86_avx2_pmaxu_w:
|
case Intrinsic::x86_avx2_pmaxu_w:
|
||||||
case Intrinsic::x86_avx2_pmaxu_d:
|
case Intrinsic::x86_avx2_pmaxu_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxu_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxu_q:
|
||||||
Opcode = X86ISD::UMAX;
|
Opcode = X86ISD::UMAX;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::x86_sse2_pminu_b:
|
case Intrinsic::x86_sse2_pminu_b:
|
||||||
@ -11208,6 +11218,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case Intrinsic::x86_avx2_pminu_b:
|
case Intrinsic::x86_avx2_pminu_b:
|
||||||
case Intrinsic::x86_avx2_pminu_w:
|
case Intrinsic::x86_avx2_pminu_w:
|
||||||
case Intrinsic::x86_avx2_pminu_d:
|
case Intrinsic::x86_avx2_pminu_d:
|
||||||
|
case Intrinsic::x86_avx512_pminu_d:
|
||||||
|
case Intrinsic::x86_avx512_pminu_q:
|
||||||
Opcode = X86ISD::UMIN;
|
Opcode = X86ISD::UMIN;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::x86_sse41_pmaxsb:
|
case Intrinsic::x86_sse41_pmaxsb:
|
||||||
@ -11216,6 +11228,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case Intrinsic::x86_avx2_pmaxs_b:
|
case Intrinsic::x86_avx2_pmaxs_b:
|
||||||
case Intrinsic::x86_avx2_pmaxs_w:
|
case Intrinsic::x86_avx2_pmaxs_w:
|
||||||
case Intrinsic::x86_avx2_pmaxs_d:
|
case Intrinsic::x86_avx2_pmaxs_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxs_d:
|
||||||
|
case Intrinsic::x86_avx512_pmaxs_q:
|
||||||
Opcode = X86ISD::SMAX;
|
Opcode = X86ISD::SMAX;
|
||||||
break;
|
break;
|
||||||
case Intrinsic::x86_sse41_pminsb:
|
case Intrinsic::x86_sse41_pminsb:
|
||||||
@ -11224,6 +11238,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case Intrinsic::x86_avx2_pmins_b:
|
case Intrinsic::x86_avx2_pmins_b:
|
||||||
case Intrinsic::x86_avx2_pmins_w:
|
case Intrinsic::x86_avx2_pmins_w:
|
||||||
case Intrinsic::x86_avx2_pmins_d:
|
case Intrinsic::x86_avx2_pmins_d:
|
||||||
|
case Intrinsic::x86_avx512_pmins_d:
|
||||||
|
case Intrinsic::x86_avx512_pmins_q:
|
||||||
Opcode = X86ISD::SMIN;
|
Opcode = X86ISD::SMIN;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1599,6 +1599,34 @@ defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
|
|||||||
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
|
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
|
||||||
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
|
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
|
||||||
|
|
||||||
|
defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
|
||||||
|
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
|
||||||
|
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
|
defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
|
||||||
|
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
|
||||||
|
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
|
||||||
|
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
|
||||||
|
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
|
defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
|
||||||
|
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
|
||||||
|
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
|
||||||
|
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
|
||||||
|
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
|
defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
|
||||||
|
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
|
||||||
|
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
|
||||||
|
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
|
||||||
|
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
|
defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
|
||||||
|
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
|
||||||
|
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 - Unpack Instructions
|
// AVX-512 - Unpack Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -233,3 +233,59 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
|
|||||||
ret <8 x double> %res
|
ret <8 x double> %res
|
||||||
}
|
}
|
||||||
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
|
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
|
||||||
|
|
||||||
|
define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
|
||||||
|
; CHECK: vpmaxud
|
||||||
|
%res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
|
||||||
|
ret <16 x i32> %res
|
||||||
|
}
|
||||||
|
declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
|
||||||
|
|
||||||
|
define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
|
||||||
|
; CHECK: vpmaxuq
|
||||||
|
%res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
|
||||||
|
ret <8 x i64> %res
|
||||||
|
}
|
||||||
|
declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
|
||||||
|
|
||||||
|
define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
|
||||||
|
; CHECK: vpmaxsd
|
||||||
|
%res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
|
||||||
|
ret <16 x i32> %res
|
||||||
|
}
|
||||||
|
declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
|
||||||
|
|
||||||
|
define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
|
||||||
|
; CHECK: vpmaxsq
|
||||||
|
%res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
|
||||||
|
ret <8 x i64> %res
|
||||||
|
}
|
||||||
|
declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
|
||||||
|
|
||||||
|
define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
|
||||||
|
; CHECK: vpminud
|
||||||
|
%res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
|
||||||
|
ret <16 x i32> %res
|
||||||
|
}
|
||||||
|
declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
|
||||||
|
|
||||||
|
define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
|
||||||
|
; CHECK: vpminuq
|
||||||
|
%res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
|
||||||
|
ret <8 x i64> %res
|
||||||
|
}
|
||||||
|
declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
|
||||||
|
|
||||||
|
define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
|
||||||
|
; CHECK: vpminsd
|
||||||
|
%res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
|
||||||
|
ret <16 x i32> %res
|
||||||
|
}
|
||||||
|
declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
|
||||||
|
|
||||||
|
define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
|
||||||
|
; CHECK: vpminsq
|
||||||
|
%res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
|
||||||
|
ret <8 x i64> %res
|
||||||
|
}
|
||||||
|
declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
|
||||||
|
Loading…
Reference in New Issue
Block a user