mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-27 13:30:05 +00:00
8bf75ed41c
Tail merging had been disabled on PPC because it would disturb bundling decisions made during pre-RA scheduling on the 970 cores. Now, however, all bundling decisions are made during post-RA scheduling, and tail merging is generally beneficial (the average test-suite speedup is insignificantly positive). Largest test-suite speedups: MultiSource/Benchmarks/mediabench/gsm/toast/toast - 30% MultiSource/Benchmarks/BitBench/uuencode/uuencode - 23% SingleSource/Benchmarks/Shootout-C++/ary - 21% SingleSource/Benchmarks/Stanford/Queens - 17% Largest slowdowns: MultiSource/Benchmarks/MiBench/security-sha/security-sha - 24% MultiSource/Benchmarks/McCat/03-testtrie/testtrie - 22% MultiSource/Applications/JM/ldecod/ldecod - 14% MultiSource/Benchmarks/mediabench/g721/g721encode/encode - 9% This is improved by using full (instead of just critical) anti-dependency breaking, but doing so still causes miscompiles and so cannot yet be enabled by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158259 91177308-0d34-0410-b5e6-96231b3b80d8
68 lines
2.4 KiB
LLVM
68 lines
2.4 KiB
LLVM
; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*baz | count 2
|
|
; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*quux | count 2
|
|
; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 1
|
|
; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 1
|
|
; Check that tail merging is the default on ppc, and that -enable-tail-merge works.
|
|
|
|
; ModuleID = 'tail.c'
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
|
|
target triple = "i686-apple-darwin8"
|
|
|
|
define i32 @f(i32 %i, i32 %q) {
|
|
entry:
|
|
%i_addr = alloca i32 ; <i32*> [#uses=2]
|
|
%q_addr = alloca i32 ; <i32*> [#uses=2]
|
|
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
|
|
store i32 %i, i32* %i_addr
|
|
store i32 %q, i32* %q_addr
|
|
%tmp = load i32* %i_addr ; <i32> [#uses=1]
|
|
%tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
|
|
%tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
|
|
%toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
|
|
br i1 %toBool, label %cond_true, label %cond_false
|
|
|
|
cond_true: ; preds = %entry
|
|
%tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
|
|
%tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
|
|
%tmp7 = load i32* %q_addr ; <i32> [#uses=1]
|
|
%tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
|
|
%tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
|
|
%toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
|
|
br i1 %toBool10, label %cond_true11, label %cond_false15
|
|
|
|
cond_false: ; preds = %entry
|
|
%tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
|
|
%tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
|
|
%tmp27 = load i32* %q_addr ; <i32> [#uses=1]
|
|
%tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
|
|
%tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
|
|
%toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
|
|
br i1 %toBool210, label %cond_true11, label %cond_false15
|
|
|
|
cond_true11: ; preds = %cond_next
|
|
%tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
|
|
%tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
|
|
br label %cond_next18
|
|
|
|
cond_false15: ; preds = %cond_next
|
|
%tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
|
|
%tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
|
|
br label %cond_next18
|
|
|
|
cond_next18: ; preds = %cond_false15, %cond_true11
|
|
%tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
|
|
br label %return
|
|
|
|
return: ; preds = %cond_next18
|
|
%retval20 = load i32* %retval ; <i32> [#uses=1]
|
|
ret i32 %retval20
|
|
}
|
|
|
|
declare i32 @bar(...)
|
|
|
|
declare i32 @baz(...)
|
|
|
|
declare i32 @foo(...)
|
|
|
|
declare i32 @quux(...)
|