From cfbf0ed8b03bbff5b4045c97dca3f93a4e6b834d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 12 Jul 2010 20:46:04 +0000 Subject: [PATCH] Apply the SSE dependence idiom for SSE unary operations to SD instructions too, in addition to SS instructions. And add a comment about it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108191 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 10 ++++++-- test/CodeGen/X86/break-sse-dep.ll | 41 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8e5f37c867b..0d5d1b449e7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1937,6 +1937,10 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, def SSr : SSI; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. def SSm : I, XS, @@ -1992,9 +1996,11 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, def SDr : SDI; - def SDm : SDI; + [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, + Requires<[HasSSE2, OptForSize]>; def SDr_Int : SDI; diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index 027d2f1dafc..094cbc7bdef 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -19,3 +19,44 @@ entry: %1 = fptrunc double %0 to float ret float %1 } + +define float @squirtf(float* %x) nounwind { +entry: +; CHECK: squirtf: +; CHECK: movss (%rdi), %xmm0 +; CHECK: sqrtss %xmm0, %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt(double* %x) nounwind { +entry: +; CHECK: squirt: +; CHECK: movsd (%rdi), %xmm0 +; CHECK: sqrtsd %xmm0, %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +define float @squirtf_size(float* %x) nounwind optsize { +entry: +; CHECK: squirtf_size: +; CHECK: sqrtss (%rdi), %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt_size(double* %x) nounwind optsize { +entry: +; CHECK: squirt_size: +; CHECK: sqrtsd (%rdi), %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double)