mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-10-12 15:24:21 +00:00
400073d546
be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91672 91177308-0d34-0410-b5e6-96231b3b80d8
29 lines
703 B
LLVM
29 lines
703 B
LLVM
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
|
|
; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
|
|
|
|
define double @t1(float* nocapture %x) nounwind readonly ssp {
|
|
entry:
|
|
; YES: t1:
|
|
; YES: movss (%rdi), %xmm0
|
|
; YES; cvtss2sd %xmm0, %xmm0
|
|
|
|
; NO: t1:
|
|
; NO; cvtss2sd (%rdi), %xmm0
|
|
%0 = load float* %x, align 4
|
|
%1 = fpext float %0 to double
|
|
ret double %1
|
|
}
|
|
|
|
define float @t2(double* nocapture %x) nounwind readonly ssp {
|
|
entry:
|
|
; YES: t2:
|
|
; YES: movsd (%rdi), %xmm0
|
|
; YES; cvtsd2ss %xmm0, %xmm0
|
|
|
|
; NO: t2:
|
|
; NO; cvtsd2ss (%rdi), %xmm0
|
|
%0 = load double* %x, align 8
|
|
%1 = fptrunc double %0 to float
|
|
ret float %1
|
|
}
|