llvm-6502/test/CodeGen/X86/break-sse-dep.ll
Evan Cheng 400073d546 On recent Intel u-arch's, folding loads into some unary SSE instructions can
be non-optimal. To be precise, we should avoid folding loads if the instructions
only update part of the destination register, and the non-updated part is not
needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks
the partial register dependency and it can improve performance. e.g.

movss (%rdi), %xmm0
cvtss2sd %xmm0, %xmm0

instead of
cvtss2sd (%rdi), %xmm0

An alternative method to break dependency is to clear the register first. e.g.
xorps %xmm0, %xmm0
cvtss2sd (%rdi), %xmm0


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91672 91177308-0d34-0410-b5e6-96231b3b80d8
2009-12-18 07:40:29 +00:00

29 lines
703 B
LLVM

; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
define double @t1(float* nocapture %x) nounwind readonly ssp {
entry:
; YES: t1:
; YES: movss (%rdi), %xmm0
; YES; cvtss2sd %xmm0, %xmm0
; NO: t1:
; NO; cvtss2sd (%rdi), %xmm0
%0 = load float* %x, align 4
%1 = fpext float %0 to double
ret double %1
}
define float @t2(double* nocapture %x) nounwind readonly ssp {
entry:
; YES: t2:
; YES: movsd (%rdi), %xmm0
; YES; cvtsd2ss %xmm0, %xmm0
; NO: t2:
; NO; cvtsd2ss (%rdi), %xmm0
%0 = load double* %x, align 8
%1 = fptrunc double %0 to float
ret float %1
}