Apply the SSE dependence idiom for SSE unary operations to

SD instructions too, in addition to SS instructions. And add a comment about it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108191 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-13 04:30:23 +00:00 · 2010-07-12 20:46:04 +00:00 · 2010-07-12 20:46:04 +00:00 · cfbf0ed8b0
commit cfbf0ed8b0
parent 4d5fe97c47
2 changed files with 49 additions and 2 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -1937,6 +1937,10 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
@ -1992,9 +1996,11 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                [(set FR64:$dst, (OpNode FR64:$src))]>;
-  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+  // See the comments in sse1_fp_unop_s for why this is OptForSize.
+  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+                [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+            Requires<[HasSSE2, OptForSize]>;
  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                    [(set VR128:$dst, (F64Int VR128:$src))]>;
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@ -19,3 +19,44 @@ entry:
  %1 = fptrunc double %0 to float
  ret float %1
 }
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; CHECK: squirtf:
+; CHECK: movss (%rdi), %xmm0
+; CHECK: sqrtss %xmm0, %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; CHECK: squirt:
+; CHECK: movsd (%rdi), %xmm0
+; CHECK: sqrtsd %xmm0, %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; CHECK: squirtf_size:
+; CHECK: sqrtss (%rdi), %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; CHECK: squirt_size:
+; CHECK: sqrtsd (%rdi), %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)