mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 23:32:27 +00:00
This is done by pushing physical register definitions close to their use, which happens to handle flag definitions if they're not glued to the branch. This seems to be generally a good thing though, so I didn't need to add a target hook yet. The primary motivation is to generate code closer to what people expect and rule out missed opportunity from enabling macro-op fusion. As a side benefit, we get several 2-5% gains on x86 benchmarks. There is one regression: SingleSource/Benchmarks/Shootout/lists slows down be -10%. But this is an independent scheduler bug that will be tracked separately. See rdar://problem/9283108. Incidentally, pre-RA scheduling is only half the solution. Fixing the later passes is tracked by: <rdar://problem/8932804> [pre-RA-sched] on x86, attempt to schedule CMP/TEST adjacent with condition jump Fixes: <rdar://problem/9262453> Scheduler unnecessary break of cmp/jump fusion git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129508 91177308-0d34-0410-b5e6-96231b3b80d8
42 lines
1.7 KiB
LLVM
42 lines
1.7 KiB
LLVM
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
|
|
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
|
|
; PR2659
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
|
target triple = "i686-apple-darwin9.4.0"
|
|
|
|
define i32 @binomial(i32 %n, i32 %k) nounwind {
|
|
entry:
|
|
%cmp = icmp ugt i32 %k, %n ; <i1> [#uses=1]
|
|
br i1 %cmp, label %ifthen, label %forcond.preheader
|
|
|
|
forcond.preheader: ; preds = %entry
|
|
%cmp44 = icmp eq i32 %k, 0 ; <i1> [#uses=1]
|
|
br i1 %cmp44, label %afterfor, label %forbody
|
|
|
|
; CHECK: %forcond.preheader
|
|
; CHECK: movl $1
|
|
; CHECK-NOT: xorl
|
|
; CHECK-NOT: movl
|
|
; CHECK-NOT: LBB
|
|
; CHECK: je
|
|
|
|
ifthen: ; preds = %entry
|
|
ret i32 0
|
|
|
|
forbody: ; preds = %forbody, %forcond.preheader
|
|
%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3]
|
|
%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; <i32> [#uses=1]
|
|
%divisor.02 = add i32 %indvar, 1 ; <i32> [#uses=2]
|
|
%n.addr.03 = sub i32 %n, %indvar ; <i32> [#uses=1]
|
|
%mul = mul i32 %n.addr.03, %accumulator.01 ; <i32> [#uses=1]
|
|
%div = udiv i32 %mul, %divisor.02 ; <i32> [#uses=2]
|
|
%inc = add i32 %indvar, 2 ; <i32> [#uses=1]
|
|
%cmp4 = icmp ugt i32 %inc, %k ; <i1> [#uses=1]
|
|
br i1 %cmp4, label %afterfor, label %forbody
|
|
|
|
afterfor: ; preds = %forbody, %forcond.preheader
|
|
%accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; <i32> [#uses=1]
|
|
ret i32 %accumulator.0.lcssa
|
|
}
|