mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-25 19:29:53 +00:00
This is a reimplementation of D9780 at the machine instruction level rather than the DAG. Use the MachineCombiner pass to reassociate scalar single-precision AVX additions (just a starting point; see the TODO comments) to increase ILP when it's safe to do so. The code is closely based on the existing MachineCombiner optimization that is implemented for AArch64. This patch should not cause the kind of spilling tragedy that led to the reversion of r236031. Differential Revision: http://reviews.llvm.org/D10321 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239486 91177308-0d34-0410-b5e6-96231b3b80d8
195 lines
5.1 KiB
LLVM
195 lines
5.1 KiB
LLVM
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
|
|
|
|
define float @test1(float %a) {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fadd float %a, %a
|
|
%r = fadd float %t1, %t1
|
|
ret float %r
|
|
}
|
|
|
|
define float @test2(float %a) {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float 4.0, %a
|
|
%t2 = fadd float %a, %a
|
|
%r = fadd float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test3(float %a) {
|
|
; CHECK-LABEL: test3:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float %a, 4.0
|
|
%t2 = fadd float %a, %a
|
|
%r = fadd float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test4(float %a) {
|
|
; CHECK-LABEL: test4:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fadd float %a, %a
|
|
%t2 = fmul float 4.0, %a
|
|
%r = fadd float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test5(float %a) {
|
|
; CHECK-LABEL: test5:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fadd float %a, %a
|
|
%t2 = fmul float %a, 4.0
|
|
%r = fadd float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test6(float %a) {
|
|
; CHECK-LABEL: test6:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float 2.0, %a
|
|
%t2 = fadd float %a, %a
|
|
%r = fsub float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test7(float %a) {
|
|
; CHECK-LABEL: test7:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float %a, 2.0
|
|
%t2 = fadd float %a, %a
|
|
%r = fsub float %t1, %t2
|
|
ret float %r
|
|
}
|
|
|
|
define float @test8(float %a) {
|
|
; CHECK-LABEL: test8:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float %a, 0.0
|
|
%t2 = fadd float %a, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
define float @test9(float %a) {
|
|
; CHECK-LABEL: test9:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: retq
|
|
%t1 = fmul float 0.0, %a
|
|
%t2 = fadd float %t1, %a
|
|
ret float %t2
|
|
}
|
|
|
|
define float @test10(float %a) {
|
|
; CHECK-LABEL: test10:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fsub float -0.0, %a
|
|
%t2 = fadd float %a, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
define float @test11(float %a) {
|
|
; CHECK-LABEL: test11:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t1 = fsub float -0.0, %a
|
|
%t2 = fadd float %a, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
; Verify that the first two adds are independent regardless of how the inputs are
|
|
; commuted. The destination registers are used as source registers for the third add.
|
|
|
|
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
|
|
; CHECK-LABEL: reassociate_adds1:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t0 = fadd float %x0, %x1
|
|
%t1 = fadd float %t0, %x2
|
|
%t2 = fadd float %t1, %x3
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
|
|
; CHECK-LABEL: reassociate_adds2:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t0 = fadd float %x0, %x1
|
|
%t1 = fadd float %x2, %t0
|
|
%t2 = fadd float %t1, %x3
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
|
|
; CHECK-LABEL: reassociate_adds3:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t0 = fadd float %x0, %x1
|
|
%t1 = fadd float %t0, %x2
|
|
%t2 = fadd float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
|
|
; CHECK-LABEL: reassociate_adds4:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t0 = fadd float %x0, %x1
|
|
%t1 = fadd float %x2, %t0
|
|
%t2 = fadd float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
|
|
; produced because that would cost more compile time.
|
|
|
|
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
|
|
; CHECK-LABEL: reassociate_adds5:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddss %xmm7, %xmm6, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%t0 = fadd float %x0, %x1
|
|
%t1 = fadd float %t0, %x2
|
|
%t2 = fadd float %t1, %x3
|
|
%t3 = fadd float %t2, %x4
|
|
%t4 = fadd float %t3, %x5
|
|
%t5 = fadd float %t4, %x6
|
|
%t6 = fadd float %t5, %x7
|
|
ret float %t6
|
|
}
|