mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-09 10:31:14 +00:00
4711d5cda3
Use the same COPY_TO_REGCLASS approach as for the 2-register *_sfp instructions. This change made a big difference in the code generated for the CodeGen/Thumb2/cross-rc-coalescing-2.ll test: The coalescer is still doing a fine job, but some instructions that were previously moved outside the loop are not moved now. It's using fewer VFP registers now, which is generally a good thing, so I think the estimates for register pressure changed and that affected the LICM behavior. Since that isn't obviously wrong, I've just changed the test file. This completes the work for Radar 8711675. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121730 91177308-0d34-0410-b5e6-96231b3b80d8
76 lines
3.8 KiB
LLVM
76 lines
3.8 KiB
LLVM
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
|
|
|
|
define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
|
|
; CHECK: fht:
|
|
entry:
|
|
br label %bb5
|
|
|
|
bb5: ; preds = %bb5, %entry
|
|
; CHECK: %bb5
|
|
; CHECK: bne
|
|
br i1 undef, label %bb5, label %bb.nph
|
|
|
|
bb.nph: ; preds = %bb5
|
|
br label %bb7
|
|
|
|
; Loop preheader
|
|
; CHECK: vmov.f32
|
|
bb7: ; preds = %bb9, %bb.nph
|
|
%s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
|
|
%tmp79 = add i32 undef, undef ; <i32> [#uses=1]
|
|
%tmp53 = sub i32 undef, undef ; <i32> [#uses=1]
|
|
%0 = fadd float 0.000000e+00, 1.000000e+00 ; <float> [#uses=2]
|
|
%1 = fmul float 0.000000e+00, 0.000000e+00 ; <float> [#uses=2]
|
|
br label %bb8
|
|
|
|
bb8: ; preds = %bb8, %bb7
|
|
; CHECK: %bb8
|
|
; CHECK-NOT: vmov.f32
|
|
; CHECK: blt
|
|
%tmp54 = add i32 0, %tmp53 ; <i32> [#uses=0]
|
|
%fi.1 = getelementptr float* %fz, i32 undef ; <float*> [#uses=2]
|
|
%tmp80 = add i32 0, %tmp79 ; <i32> [#uses=1]
|
|
%scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
|
|
%2 = load float* undef, align 4 ; <float> [#uses=1]
|
|
%3 = fmul float %2, %1 ; <float> [#uses=1]
|
|
%4 = load float* null, align 4 ; <float> [#uses=2]
|
|
%5 = fmul float %4, %0 ; <float> [#uses=1]
|
|
%6 = fsub float %3, %5 ; <float> [#uses=1]
|
|
%7 = fmul float %4, %1 ; <float> [#uses=1]
|
|
%8 = fadd float undef, %7 ; <float> [#uses=2]
|
|
%9 = load float* %fi.1, align 4 ; <float> [#uses=2]
|
|
%10 = fsub float %9, %8 ; <float> [#uses=1]
|
|
%11 = fadd float %9, %8 ; <float> [#uses=1]
|
|
%12 = fsub float 0.000000e+00, %6 ; <float> [#uses=1]
|
|
%13 = fsub float 0.000000e+00, undef ; <float> [#uses=2]
|
|
%14 = fmul float undef, %0 ; <float> [#uses=1]
|
|
%15 = fadd float %14, undef ; <float> [#uses=2]
|
|
%16 = load float* %scevgep81, align 4 ; <float> [#uses=2]
|
|
%17 = fsub float %16, %15 ; <float> [#uses=1]
|
|
%18 = fadd float %16, %15 ; <float> [#uses=2]
|
|
%19 = load float* undef, align 4 ; <float> [#uses=2]
|
|
%20 = fsub float %19, %13 ; <float> [#uses=2]
|
|
%21 = fadd float %19, %13 ; <float> [#uses=1]
|
|
%22 = fmul float %s1.02, %18 ; <float> [#uses=1]
|
|
%23 = fmul float 0.000000e+00, %20 ; <float> [#uses=1]
|
|
%24 = fsub float %22, %23 ; <float> [#uses=1]
|
|
%25 = fmul float 0.000000e+00, %18 ; <float> [#uses=1]
|
|
%26 = fmul float %s1.02, %20 ; <float> [#uses=1]
|
|
%27 = fadd float %25, %26 ; <float> [#uses=1]
|
|
%28 = fadd float %11, %27 ; <float> [#uses=1]
|
|
store float %28, float* %fi.1, align 4
|
|
%29 = fadd float %12, %24 ; <float> [#uses=1]
|
|
store float %29, float* null, align 4
|
|
%30 = fmul float 0.000000e+00, %21 ; <float> [#uses=1]
|
|
%31 = fmul float %s1.02, %17 ; <float> [#uses=1]
|
|
%32 = fsub float %30, %31 ; <float> [#uses=1]
|
|
%33 = fsub float %10, %32 ; <float> [#uses=1]
|
|
store float %33, float* undef, align 4
|
|
%34 = icmp slt i32 undef, undef ; <i1> [#uses=1]
|
|
br i1 %34, label %bb8, label %bb9
|
|
|
|
bb9: ; preds = %bb8
|
|
%35 = fadd float 0.000000e+00, undef ; <float> [#uses=1]
|
|
br label %bb7
|
|
}
|