From 6991623dd7b8a096ccddbaedd9de3f515bacb1e2 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 13 Jul 2010 01:53:31 +0000 Subject: [PATCH] Add AVX 256-bit unop arithmetic instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 141 +++++++++++++++--------- test/MC/AsmParser/X86/x86_32-encoding.s | 32 ++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 32 ++++++ 3 files changed, 151 insertions(+), 54 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 61723db7a2a..96a82f32526 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1584,23 +1584,6 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; } -/// sse1_fp_unop_p - SSE1 unops in scalar form. -multiclass sse1_fp_unop_p opc, string OpcodeStr, - SDNode OpNode, Intrinsic V4F32Int> { - def PSr : PSI; - def PSm : PSI; - def PSr_Int : PSI; - def PSm_Int : PSI; -} - /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. multiclass sse1_fp_unop_s_avx opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int> { @@ -1621,6 +1604,38 @@ multiclass sse1_fp_unop_s_avx opc, string OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } +/// sse1_fp_unop_p - SSE1 unops in packed form. +multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { + def PSr : PSI; + def PSm : PSI; +} + +/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. +multiclass sse1_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode> { + def PSYr : PSI; + def PSYm : PSI; +} + +/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSr_Int : PSI; + def PSm_Int : PSI; +} + + /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int> { @@ -1640,23 +1655,6 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; } -/// sse2_fp_unop_p - SSE2 unops in vector forms. -multiclass sse2_fp_unop_p opc, string OpcodeStr, - SDNode OpNode, Intrinsic V2F64Int> { - def PDr : PDI; - def PDm : PDI; - def PDr_Int : PDI; - def PDm_Int : PDI; -} - /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int> { @@ -1677,44 +1675,79 @@ multiclass sse2_fp_unop_s_avx opc, string OpcodeStr, []>; } -let isAsmParserOnly = 1 in { +/// sse2_fp_unop_p - SSE2 unops in vector forms. +multiclass sse2_fp_unop_p opc, string OpcodeStr, + SDNode OpNode> { + def PDr : PDI; + def PDm : PDI; +} + +/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. +multiclass sse2_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode> { + def PDYr : PDI; + def PDYm : PDI; +} + +/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V2F64Int> { + def PDr_Int : PDI; + def PDm_Int : PDI; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { // Square root. - let Predicates = [HasAVX] in { - defm VSQRT : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, - VEX_4V; - - defm VSQRT : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX; - } - - let Predicates = [HasAVX] in { defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, - VEX_4V; - defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX; + sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + VEX_4V; + + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + VEX; + // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, VEX_4V; - defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>, - VEX; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, VEX_4V; - defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>, - VEX; - } + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; } // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ps>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_pd>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt>, + sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>; + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, int_x86_sse_rcp_ps>; + sse1_fp_unop_p<0x53, "rcp", X86frcp>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; // There is no f64 version of the reciprocal approximation instructions. diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 43438deb618..177b2a37d0f 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12502,3 +12502,35 @@ // CHECK: encoding: [0xc5,0xdd,0x59,0x30] vmulpd (%eax), %ymm4, %ymm6 +// CHECK: vsqrtpd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] + vsqrtpd %ymm1, %ymm2 + +// CHECK: vsqrtpd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0x10] + vsqrtpd (%eax), %ymm2 + +// CHECK: vsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] + vsqrtps %ymm1, %ymm2 + +// CHECK: vsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0x10] + vsqrtps (%eax), %ymm2 + +// CHECK: vrsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] + vrsqrtps %ymm1, %ymm2 + +// CHECK: vrsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0x10] + vrsqrtps (%eax), %ymm2 + +// CHECK: vrcpps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] + vrcpps %ymm1, %ymm2 + +// CHECK: vrcpps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0x10] + vrcpps (%eax), %ymm2 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 74a4d675c49..1504f6b0abf 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2576,3 +2576,35 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0xdd,0x59,0x30] vmulpd (%rax), %ymm4, %ymm6 +// CHECK: vsqrtpd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] + vsqrtpd %ymm11, %ymm12 + +// CHECK: vsqrtpd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x51,0x20] + vsqrtpd (%rax), %ymm12 + +// CHECK: vsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] + vsqrtps %ymm11, %ymm12 + +// CHECK: vsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x51,0x20] + vsqrtps (%rax), %ymm12 + +// CHECK: vrsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] + vrsqrtps %ymm11, %ymm12 + +// CHECK: vrsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x52,0x20] + vrsqrtps (%rax), %ymm12 + +// CHECK: vrcpps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] + vrcpps %ymm11, %ymm12 + +// CHECK: vrcpps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x53,0x20] + vrcpps (%rax), %ymm12 +