mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
623a7e146b
instruction lower optimization" in the pre-RA scheduler. The optimization, rather the hack, was done before MI use-list was available. Now we should be able to implement it in a better way, perhaps in the two-address pass until a MI scheduler is available. Now that the scheduler has to backtrack to handle call sequences. Adding artificial scheduling constraints is just not safe. Furthermore, the hack is not taking all the other scheduling decisions into consideration so it's just as likely to pessimize code. So I view disabling this optimization goodness regardless of PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144267 91177308-0d34-0410-b5e6-96231b3b80d8
64 lines
2.7 KiB
LLVM
64 lines
2.7 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
|
|
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
|
|
|
|
; Full strength reduction wouldn't reduce register pressure, so LSR should
|
|
; stick with indexing here.
|
|
|
|
; FIXME: This is worse off from disabling of scheduler 2-address hack.
|
|
; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
|
|
; CHECK: leaq 4(%rax), %{{rcx|r9}}
|
|
; CHECK: cvtdq2ps
|
|
; CHECK: orps {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
|
|
; CHECK: movaps [[X4]], (%{{rdi|rcx}},%rax,4)
|
|
; CHECK: cmpl %{{ecx|r9d}}, (%{{rdx|r8}})
|
|
; CHECK: jg
|
|
|
|
define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
|
|
entry:
|
|
%0 = load i32* %n, align 4
|
|
%1 = icmp sgt i32 %0, 0
|
|
br i1 %1, label %bb, label %return
|
|
|
|
bb:
|
|
%indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
|
|
%tmp = shl i64 %indvar, 2
|
|
%scevgep = getelementptr float* %y, i64 %tmp
|
|
%scevgep9 = bitcast float* %scevgep to <4 x float>*
|
|
%scevgep10 = getelementptr float* %x, i64 %tmp
|
|
%scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
|
|
%2 = load <4 x float>* %scevgep1011, align 16
|
|
%3 = bitcast <4 x float> %2 to <4 x i32>
|
|
%4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
|
|
%5 = bitcast <4 x i32> %4 to <4 x float>
|
|
%6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
|
%7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
|
|
%tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
|
|
%8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
|
|
%10 = or <4 x i32> %9, %6
|
|
%11 = bitcast <4 x i32> %10 to <4 x float>
|
|
%12 = fadd <4 x float> %2, %11
|
|
%13 = fsub <4 x float> %12, %11
|
|
%14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
|
|
%15 = bitcast <4 x float> %14 to <4 x i32>
|
|
%16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
|
|
%17 = fadd <4 x float> %13, %16
|
|
%tmp.i = bitcast <4 x float> %17 to <4 x i32>
|
|
%18 = or <4 x i32> %tmp.i, %6
|
|
%19 = bitcast <4 x i32> %18 to <4 x float>
|
|
store <4 x float> %19, <4 x float>* %scevgep9, align 16
|
|
%tmp12 = add i64 %tmp, 4
|
|
%tmp13 = trunc i64 %tmp12 to i32
|
|
%20 = load i32* %n, align 4
|
|
%21 = icmp sgt i32 %20, %tmp13
|
|
%indvar.next = add i64 %indvar, 1
|
|
br i1 %21, label %bb, label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
|