llvm-6502/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
Bob Wilson 4711d5cda3 Remove the rest of the *_sfp Neon instruction patterns.
Use the same COPY_TO_REGCLASS approach as for the 2-register *_sfp instructions.
This change made a big difference in the code generated for the
CodeGen/Thumb2/cross-rc-coalescing-2.ll test: The coalescer is still doing
a fine job, but some instructions that were previously moved outside the loop
are not moved now.  It's using fewer VFP registers now, which is generally
a good thing, so I think the estimates for register pressure changed and that
affected the LICM behavior.  Since that isn't obviously wrong, I've just
changed the test file.  This completes the work for Radar 8711675.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121730 91177308-0d34-0410-b5e6-96231b3b80d8
2010-12-13 23:02:37 +00:00

76 lines
3.8 KiB
LLVM

; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
; CHECK: fht:
entry:
br label %bb5
bb5: ; preds = %bb5, %entry
; CHECK: %bb5
; CHECK: bne
br i1 undef, label %bb5, label %bb.nph
bb.nph: ; preds = %bb5
br label %bb7
; Loop preheader
; CHECK: vmov.f32
bb7: ; preds = %bb9, %bb.nph
%s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
%tmp79 = add i32 undef, undef ; <i32> [#uses=1]
%tmp53 = sub i32 undef, undef ; <i32> [#uses=1]
%0 = fadd float 0.000000e+00, 1.000000e+00 ; <float> [#uses=2]
%1 = fmul float 0.000000e+00, 0.000000e+00 ; <float> [#uses=2]
br label %bb8
bb8: ; preds = %bb8, %bb7
; CHECK: %bb8
; CHECK-NOT: vmov.f32
; CHECK: blt
%tmp54 = add i32 0, %tmp53 ; <i32> [#uses=0]
%fi.1 = getelementptr float* %fz, i32 undef ; <float*> [#uses=2]
%tmp80 = add i32 0, %tmp79 ; <i32> [#uses=1]
%scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
%2 = load float* undef, align 4 ; <float> [#uses=1]
%3 = fmul float %2, %1 ; <float> [#uses=1]
%4 = load float* null, align 4 ; <float> [#uses=2]
%5 = fmul float %4, %0 ; <float> [#uses=1]
%6 = fsub float %3, %5 ; <float> [#uses=1]
%7 = fmul float %4, %1 ; <float> [#uses=1]
%8 = fadd float undef, %7 ; <float> [#uses=2]
%9 = load float* %fi.1, align 4 ; <float> [#uses=2]
%10 = fsub float %9, %8 ; <float> [#uses=1]
%11 = fadd float %9, %8 ; <float> [#uses=1]
%12 = fsub float 0.000000e+00, %6 ; <float> [#uses=1]
%13 = fsub float 0.000000e+00, undef ; <float> [#uses=2]
%14 = fmul float undef, %0 ; <float> [#uses=1]
%15 = fadd float %14, undef ; <float> [#uses=2]
%16 = load float* %scevgep81, align 4 ; <float> [#uses=2]
%17 = fsub float %16, %15 ; <float> [#uses=1]
%18 = fadd float %16, %15 ; <float> [#uses=2]
%19 = load float* undef, align 4 ; <float> [#uses=2]
%20 = fsub float %19, %13 ; <float> [#uses=2]
%21 = fadd float %19, %13 ; <float> [#uses=1]
%22 = fmul float %s1.02, %18 ; <float> [#uses=1]
%23 = fmul float 0.000000e+00, %20 ; <float> [#uses=1]
%24 = fsub float %22, %23 ; <float> [#uses=1]
%25 = fmul float 0.000000e+00, %18 ; <float> [#uses=1]
%26 = fmul float %s1.02, %20 ; <float> [#uses=1]
%27 = fadd float %25, %26 ; <float> [#uses=1]
%28 = fadd float %11, %27 ; <float> [#uses=1]
store float %28, float* %fi.1, align 4
%29 = fadd float %12, %24 ; <float> [#uses=1]
store float %29, float* null, align 4
%30 = fmul float 0.000000e+00, %21 ; <float> [#uses=1]
%31 = fmul float %s1.02, %17 ; <float> [#uses=1]
%32 = fsub float %30, %31 ; <float> [#uses=1]
%33 = fsub float %10, %32 ; <float> [#uses=1]
store float %33, float* undef, align 4
%34 = icmp slt i32 undef, undef ; <i1> [#uses=1]
br i1 %34, label %bb8, label %bb9
bb9: ; preds = %bb8
%35 = fadd float 0.000000e+00, undef ; <float> [#uses=1]
br label %bb7
}