From 975e9b99aa0b3bbd9480a9e51a3d99f2242cd2ed Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 1 Mar 2015 07:44:04 +0000 Subject: [PATCH] AVX-512: Added mask and rounding mode for scalar arithmetics Added more tests for scalar instructions to destinguish between AVX and AVX-512 forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230891 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 99 ++++++++++++++++++++---- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + lib/Target/X86/X86InstrSSE.td | 4 +- test/CodeGen/X86/avx512-scalar.ll | 92 ++++++++++++++++++++++ utils/TableGen/X86DisassemblerTables.cpp | 12 +++ 5 files changed, 191 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/X86/avx512-scalar.ll diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4923bc5f1dd..f778ff0ea0a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3243,28 +3243,95 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// +multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, + SDNode OpNode, SDNode VecNode, OpndItins itins, + bit IsCommutable> { -multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + defm rr_Int : AVX512_maskable_scalar; + + defm rm_Int : AVX512_maskable_scalar; + let isCodeGenOnly = 1, isCommutable = IsCommutable, + Predicates = [HasAVX512] in { + def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], + itins.rr>; + def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2)))], itins.rr>; + } } -let isCommutable = 1 in { -defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; -defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; -defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; -defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; +multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable> { + + defm rrb : AVX512_maskable_scalar, + EVEX_B, EVEX_RC; } -let isCommutable = 0 in { -defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; -defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; +multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable> { + + defm rrb : AVX512_maskable_scalar, EVEX_B; } +multiclass avx512_binop_s_round opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar, + avx512_fp_scalar_round, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar, + avx512_fp_scalar_round, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} + +multiclass avx512_binop_s_sae opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar, + avx512_fp_scalar_sae, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar, + avx512_fp_scalar_sae, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>; +defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>; +defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>; + multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { defm rr: AVX512_maskable; def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; +def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>; +def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 127d3e9a1d9..8e8bdb60d2b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3567,7 +3567,7 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG; + itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, @@ -3579,7 +3579,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SD : avx_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG; + OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG; } // Square root. diff --git a/test/CodeGen/X86/avx512-scalar.ll b/test/CodeGen/X86/avx512-scalar.ll new file mode 100644 index 00000000000..644fda41575 --- /dev/null +++ b/test/CodeGen/X86/avx512-scalar.ll @@ -0,0 +1,92 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix AVX + +; AVX512-LABEL: @test_fdiv +; AVX512: vdivss %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_fdiv +; AVX: vdivss %xmm{{.*}} ## encoding: [0xc5 + +define float @test_fdiv(float %a, float %b) { + %c = fdiv float %a, %b + ret float %c +} + +; AVX512-LABEL: @test_fsub +; AVX512: vsubss %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_fsub +; AVX: vsubss %xmm{{.*}} ## encoding: [0xc5 + +define float @test_fsub(float %a, float %b) { + %c = fsub float %a, %b + ret float %c +} + +; AVX512-LABEL: @test_fadd +; AVX512: vaddsd %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_fadd +; AVX: vaddsd %xmm{{.*}} ## encoding: [0xc5 + +define double @test_fadd(double %a, double %b) { + %c = fadd double %a, %b + ret double %c +} + +declare float @llvm.trunc.f32(float %Val) +declare double @llvm.trunc.f64(double %Val) +declare float @llvm.rint.f32(float %Val) +declare double @llvm.rint.f64(double %Val) +declare double @llvm.sqrt.f64(double %Val) +declare float @llvm.sqrt.f32(float %Val) + +; AVX512-LABEL: @test_trunc +; AVX512: vrndscaless +; AVX-LABEL: @test_trunc +; AVX: vroundss + +define float @test_trunc(float %a) { + %c = call float @llvm.trunc.f32(float %a) + ret float %c +} + +; AVX512-LABEL: @test_sqrt +; AVX512: vsqrtsd %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_sqrt +; AVX: vsqrtsd %xmm{{.*}} ## encoding: [0xc5 + +define double @test_sqrt(double %a) { + %c = call double @llvm.sqrt.f64(double %a) + ret double %c +} + +; AVX512-LABEL: @test_rint +; AVX512: vrndscaless +; AVX-LABEL: @test_rint +; AVX: vroundss + +define float @test_rint(float %a) { + %c = call float @llvm.rint.f32(float %a) + ret float %c +} + +; AVX512-LABEL: @test_vmax +; AVX512: vmaxss %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_vmax +; AVX: vmaxss %xmm{{.*}} ## encoding: [0xc5 + +define float @test_vmax(float %i, float %j) { + %cmp_res = fcmp ogt float %i, %j + %max = select i1 %cmp_res, float %i, float %j + ret float %max +} + +; AVX512-LABEL: @test_mov +; AVX512: vcmpltss %xmm{{.*}} ## encoding: [0x62 +; AVX-LABEL: @test_mov +; AVX: vcmpltss %xmm{{.*}} ## encoding: [0xc5 + +define float @test_mov(float %a, float %b, float %i, float %j) { + %cmp_res = fcmp ogt float %i, %j + %max = select i1 %cmp_res, float %b, float %a + ret float %max +} + diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index fbe5502bc90..e7e292de824 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -215,11 +215,17 @@ static inline bool inheritsFrom(InstructionContext child, return inheritsFrom(child, IC_EVEX_W_K) || inheritsFrom(child, IC_EVEX_L_W_K); case IC_EVEX_XS_K: + case IC_EVEX_XS_K_B: + case IC_EVEX_XS_KZ_B: return inheritsFrom(child, IC_EVEX_W_XS_K) || inheritsFrom(child, IC_EVEX_L_W_XS_K); case IC_EVEX_XD_K: + case IC_EVEX_XD_K_B: + case IC_EVEX_XD_KZ_B: return inheritsFrom(child, IC_EVEX_W_XD_K) || inheritsFrom(child, IC_EVEX_L_W_XD_K); + case IC_EVEX_XS_B: + case IC_EVEX_XD_B: case IC_EVEX_K_B: case IC_EVEX_KZ: return false; @@ -253,6 +259,12 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_W_KZ: case IC_EVEX_W_XS_KZ: case IC_EVEX_W_XD_KZ: + case IC_EVEX_W_XS_B: + case IC_EVEX_W_XD_B: + case IC_EVEX_W_XS_K_B: + case IC_EVEX_W_XD_K_B: + case IC_EVEX_W_XS_KZ_B: + case IC_EVEX_W_XD_KZ_B: case IC_EVEX_W_OPSIZE_KZ: case IC_EVEX_W_OPSIZE_KZ_B: return false;