From 62d66cbec5b2d4e00e86457762df0127ae234e6f Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 27 Oct 2013 08:18:37 +0000 Subject: [PATCH] AVX-512: PMIN/PMAX intrinsics and patterns Patch by Cameron McInally git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193497 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 26 +++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++- lib/Target/X86/X86InstrAVX512.td | 28 ++++++++++++++ test/CodeGen/X86/avx512-intrinsics.ll | 56 +++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 537089d4e34..e39eea8da48 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2719,6 +2719,32 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty], [IntrNoMem]>; + + def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty], [IntrNoMem]>; + def int_x86_avx512_pmaxu_q : GCCBuiltin<"__builtin_ia32_pmaxuq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty], [IntrNoMem]>; + def int_x86_avx512_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty], [IntrNoMem]>; + def int_x86_avx512_pmaxs_q : GCCBuiltin<"__builtin_ia32_pmaxsq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty], [IntrNoMem]>; + + def int_x86_avx512_pminu_d : GCCBuiltin<"__builtin_ia32_pminud512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty], [IntrNoMem]>; + def int_x86_avx512_pminu_q : GCCBuiltin<"__builtin_ia32_pminuq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty], [IntrNoMem]>; + def int_x86_avx512_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty], [IntrNoMem]>; + def int_x86_avx512_pmins_q : GCCBuiltin<"__builtin_ia32_pminsq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty], [IntrNoMem]>; } let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0d7818943c8..5dbef0f6fc3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11173,24 +11173,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pmaxu_b: case Intrinsic::x86_avx2_pmaxu_w: case Intrinsic::x86_avx2_pmaxu_d: + case Intrinsic::x86_avx512_pmaxu_d: + case Intrinsic::x86_avx512_pmaxu_q: case Intrinsic::x86_sse2_pminu_b: case Intrinsic::x86_sse41_pminuw: case Intrinsic::x86_sse41_pminud: case Intrinsic::x86_avx2_pminu_b: case Intrinsic::x86_avx2_pminu_w: case Intrinsic::x86_avx2_pminu_d: + case Intrinsic::x86_avx512_pminu_d: + case Intrinsic::x86_avx512_pminu_q: case Intrinsic::x86_sse41_pmaxsb: case Intrinsic::x86_sse2_pmaxs_w: case Intrinsic::x86_sse41_pmaxsd: case Intrinsic::x86_avx2_pmaxs_b: case Intrinsic::x86_avx2_pmaxs_w: case Intrinsic::x86_avx2_pmaxs_d: + case Intrinsic::x86_avx512_pmaxs_d: + case Intrinsic::x86_avx512_pmaxs_q: case Intrinsic::x86_sse41_pminsb: case Intrinsic::x86_sse2_pmins_w: case Intrinsic::x86_sse41_pminsd: case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_w: - case Intrinsic::x86_avx2_pmins_d: { + case Intrinsic::x86_avx2_pmins_d: + case Intrinsic::x86_avx512_pmins_d: + case Intrinsic::x86_avx512_pmins_q: { unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -11200,6 +11208,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pmaxu_b: case Intrinsic::x86_avx2_pmaxu_w: case Intrinsic::x86_avx2_pmaxu_d: + case Intrinsic::x86_avx512_pmaxu_d: + case Intrinsic::x86_avx512_pmaxu_q: Opcode = X86ISD::UMAX; break; case Intrinsic::x86_sse2_pminu_b: @@ -11208,6 +11218,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pminu_b: case Intrinsic::x86_avx2_pminu_w: case Intrinsic::x86_avx2_pminu_d: + case Intrinsic::x86_avx512_pminu_d: + case Intrinsic::x86_avx512_pminu_q: Opcode = X86ISD::UMIN; break; case Intrinsic::x86_sse41_pmaxsb: @@ -11216,6 +11228,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pmaxs_b: case Intrinsic::x86_avx2_pmaxs_w: case Intrinsic::x86_avx2_pmaxs_d: + case Intrinsic::x86_avx512_pmaxs_d: + case Intrinsic::x86_avx512_pmaxs_q: Opcode = X86ISD::SMAX; break; case Intrinsic::x86_sse41_pminsb: @@ -11224,6 +11238,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_w: case Intrinsic::x86_avx2_pmins_d: + case Intrinsic::x86_avx512_pmins_d: + case Intrinsic::x86_avx512_pmins_q: Opcode = X86ISD::SMIN; break; } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ed90a4bca3b..1ac563010fd 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1599,6 +1599,34 @@ defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), (VPMULUDQZrr VR512:$src1, VR512:$src2)>; +defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + //===----------------------------------------------------------------------===// // AVX-512 - Unpack Instructions //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 5cd7311eebe..29b508302d0 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -233,3 +233,59 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { ret <8 x double> %res } declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly + +define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpmaxud + %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly + +define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) { + ; CHECK: vpmaxuq + %res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly + +define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpmaxsd + %res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly + +define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) { + ; CHECK: vpmaxsq + %res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly + +define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpminud + %res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly + +define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) { + ; CHECK: vpminuq + %res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly + +define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpminsd + %res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly + +define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) { + ; CHECK: vpminsq + %res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly