From ea86423cbd3cc71a2ee2d261c25ab6c0eea0c7e0 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 29 Jun 2010 17:26:30 +0000 Subject: [PATCH] Add sqrt, rsqrt and rcp AVX instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107166 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 68 ++++++++++++++++++++++++- test/MC/AsmParser/X86/x86_32-encoding.s | 65 +++++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 64 +++++++++++++++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0ee1216fd2a..9cc3e25a680 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1873,7 +1873,7 @@ let isCommutable = 0 in { /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int> { + SDNode OpNode, Intrinsic F32Int> { def SSr : SSI; @@ -1906,6 +1906,26 @@ multiclass sse1_fp_unop_p opc, string OpcodeStr, [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; } +/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. +multiclass sse1_fp_unop_s_avx opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI; + def SSm : I, XS, Requires<[HasAVX, HasSSE1, OptForSize]>; + def SSr_Int : SSI; + def SSm_Int : SSI; +} + /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int> { @@ -1940,6 +1960,52 @@ multiclass sse2_fp_unop_p opc, string OpcodeStr, [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; } +/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +multiclass sse2_fp_unop_s_avx opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { + def SDr : VSDI; + def SDm : VSDI; + def SDr_Int : VSDI; + def SDm_Int : VSDI; +} + +let isAsmParserOnly = 1 in { + // Square root. + let Predicates = [HasAVX, HasSSE2] in { + defm VSQRT : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + VEX_4V; + + defm VSQRT : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX; + } + + let Predicates = [HasAVX, HasSSE1] in { + defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + VEX_4V; + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX; + // Reciprocal approximations. Note that these typically require refinement + // in order to obtain suitable precision. + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>, + VEX; + defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + VEX_4V; + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>, + VEX; + } +} + // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ps>, diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index d6549ea49ba..3140685691a 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10873,3 +10873,68 @@ // CHECK: vcvtpd2ps %xmm2, %xmm3 // CHECK: encoding: [0xc5,0xf9,0x5a,0xda] vcvtpd2ps %xmm2, %xmm3 + +// CHECK: vsqrtpd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] + vsqrtpd %xmm1, %xmm2 + +// CHECK: vsqrtpd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0x10] + vsqrtpd (%eax), %xmm2 + +// CHECK: vsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] + vsqrtps %xmm1, %xmm2 + +// CHECK: vsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0x10] + vsqrtps (%eax), %xmm2 + +// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] + vsqrtsd %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0x18] + vsqrtsd (%eax), %xmm2, %xmm3 + +// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0xd9] + vsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0x18] + vsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] + vrsqrtps %xmm1, %xmm2 + +// CHECK: vrsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0x10] + vrsqrtps (%eax), %xmm2 + +// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0xd9] + vrsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0x18] + vrsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrcpps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] + vrcpps %xmm1, %xmm2 + +// CHECK: vrcpps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0x10] + vrcpps (%eax), %xmm2 + +// CHECK: vrcpss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0xd9] + vrcpss %xmm1, %xmm2, %xmm3 + +// CHECK: vrcpss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0x18] + vrcpss (%eax), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 1907006fd7d..ddb97d49e2c 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -922,3 +922,67 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] vcvtpd2ps %xmm12, %xmm11 +// CHECK: vsqrtpd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] + vsqrtpd %xmm11, %xmm12 + +// CHECK: vsqrtpd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x51,0x20] + vsqrtpd (%rax), %xmm12 + +// CHECK: vsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] + vsqrtps %xmm11, %xmm12 + +// CHECK: vsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x51,0x20] + vsqrtps (%rax), %xmm12 + +// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] + vsqrtsd %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x51,0x10] + vsqrtsd (%rax), %xmm12, %xmm10 + +// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] + vsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x51,0x10] + vsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] + vrsqrtps %xmm11, %xmm12 + +// CHECK: vrsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x52,0x20] + vrsqrtps (%rax), %xmm12 + +// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] + vrsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x52,0x10] + vrsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrcpps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] + vrcpps %xmm11, %xmm12 + +// CHECK: vrcpps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x53,0x20] + vrcpps (%rax), %xmm12 + +// CHECK: vrcpss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] + vrcpss %xmm11, %xmm12, %xmm10 + +// CHECK: vrcpss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x53,0x10] + vrcpss (%rax), %xmm12, %xmm10 +