AVX-512: PMIN/PMAX intrinsics and patterns

Patch by Cameron McInally <cameron.mcinally@nyu.edu>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193497 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2013-10-27 08:18:37 +00:00
parent cba7d7d579
commit 62d66cbec5
4 changed files with 127 additions and 1 deletions

View File

@ -2719,6 +2719,32 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">, def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_v8f64_ty], [IntrNoMem]>; llvm_v8f64_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxu_q : GCCBuiltin<"__builtin_ia32_pmaxuq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmaxs_q : GCCBuiltin<"__builtin_ia32_pmaxsq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pminu_d : GCCBuiltin<"__builtin_ia32_pminud512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pminu_q : GCCBuiltin<"__builtin_ia32_pminuq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmins_q : GCCBuiltin<"__builtin_ia32_pminsq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
} }
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".

View File

@ -11173,24 +11173,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b: case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w: case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d: case Intrinsic::x86_avx2_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b: case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw: case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud: case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b: case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w: case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d: case Intrinsic::x86_avx2_pminu_d:
case Intrinsic::x86_avx512_pminu_d:
case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb: case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w: case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd: case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b: case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w: case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d: case Intrinsic::x86_avx2_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb: case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w: case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd: case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w: case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d: { case Intrinsic::x86_avx2_pmins_d:
case Intrinsic::x86_avx512_pmins_d:
case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode; unsigned Opcode;
switch (IntNo) { switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@ -11200,6 +11208,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b: case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w: case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d: case Intrinsic::x86_avx2_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_d:
case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX; Opcode = X86ISD::UMAX;
break; break;
case Intrinsic::x86_sse2_pminu_b: case Intrinsic::x86_sse2_pminu_b:
@ -11208,6 +11218,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pminu_b: case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w: case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d: case Intrinsic::x86_avx2_pminu_d:
case Intrinsic::x86_avx512_pminu_d:
case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN; Opcode = X86ISD::UMIN;
break; break;
case Intrinsic::x86_sse41_pmaxsb: case Intrinsic::x86_sse41_pmaxsb:
@ -11216,6 +11228,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxs_b: case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w: case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d: case Intrinsic::x86_avx2_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_d:
case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX; Opcode = X86ISD::SMAX;
break; break;
case Intrinsic::x86_sse41_pminsb: case Intrinsic::x86_sse41_pminsb:
@ -11224,6 +11238,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w: case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d: case Intrinsic::x86_avx2_pmins_d:
case Intrinsic::x86_avx512_pmins_d:
case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN; Opcode = X86ISD::SMIN;
break; break;
} }

View File

@ -1599,6 +1599,34 @@ defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
(VPMULUDQZrr VR512:$src1, VR512:$src2)>; (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// AVX-512 - Unpack Instructions // AVX-512 - Unpack Instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -233,3 +233,59 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
ret <8 x double> %res ret <8 x double> %res
} }
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK: vpmaxud
%res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK: vpmaxuq
%res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK: vpmaxsd
%res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK: vpmaxsq
%res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK: vpminud
%res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK: vpminuq
%res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK: vpminsd
%res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK: vpminsq
%res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly