mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-22 07:32:48 +00:00
12f0dc6bb5
This is done by pushing physical register definitions close to their use, which happens to handle flag definitions if they're not glued to the branch. This seems to be generally a good thing though, so I didn't need to add a target hook yet. The primary motivation is to generate code closer to what people expect and rule out missed opportunity from enabling macro-op fusion. As a side benefit, we get several 2-5% gains on x86 benchmarks. There is one regression: SingleSource/Benchmarks/Shootout/lists slows down be -10%. But this is an independent scheduler bug that will be tracked separately. See rdar://problem/9283108. Incidentally, pre-RA scheduling is only half the solution. Fixing the later passes is tracked by: <rdar://problem/8932804> [pre-RA-sched] on x86, attempt to schedule CMP/TEST adjacent with condition jump Fixes: <rdar://problem/9262453> Scheduler unnecessary break of cmp/jump fusion git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129508 91177308-0d34-0410-b5e6-96231b3b80d8
42 lines
1.7 KiB
LLVM
42 lines
1.7 KiB
LLVM
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
|
|
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
|
|
; PR2659
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
|
target triple = "i686-apple-darwin9.4.0"
|
|
|
|
define i32 @binomial(i32 %n, i32 %k) nounwind {
|
|
entry:
|
|
%cmp = icmp ugt i32 %k, %n ; <i1> [#uses=1]
|
|
br i1 %cmp, label %ifthen, label %forcond.preheader
|
|
|
|
forcond.preheader: ; preds = %entry
|
|
%cmp44 = icmp eq i32 %k, 0 ; <i1> [#uses=1]
|
|
br i1 %cmp44, label %afterfor, label %forbody
|
|
|
|
; CHECK: %forcond.preheader
|
|
; CHECK: movl $1
|
|
; CHECK-NOT: xorl
|
|
; CHECK-NOT: movl
|
|
; CHECK-NOT: LBB
|
|
; CHECK: je
|
|
|
|
ifthen: ; preds = %entry
|
|
ret i32 0
|
|
|
|
forbody: ; preds = %forbody, %forcond.preheader
|
|
%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3]
|
|
%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; <i32> [#uses=1]
|
|
%divisor.02 = add i32 %indvar, 1 ; <i32> [#uses=2]
|
|
%n.addr.03 = sub i32 %n, %indvar ; <i32> [#uses=1]
|
|
%mul = mul i32 %n.addr.03, %accumulator.01 ; <i32> [#uses=1]
|
|
%div = udiv i32 %mul, %divisor.02 ; <i32> [#uses=2]
|
|
%inc = add i32 %indvar, 2 ; <i32> [#uses=1]
|
|
%cmp4 = icmp ugt i32 %inc, %k ; <i1> [#uses=1]
|
|
br i1 %cmp4, label %afterfor, label %forbody
|
|
|
|
afterfor: ; preds = %forbody, %forcond.preheader
|
|
%accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; <i32> [#uses=1]
|
|
ret i32 %accumulator.0.lcssa
|
|
}
|