mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
395f4f4b2a
This is a re-commit of r235101, which also fixes the problems with the previous patch: - Switches with only a default case and non-fallthrough were handled incorrectly - The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here. > This is a major rewrite of the SelectionDAG switch lowering. The previous code > would lower switches as a binary tre, discovering clusters of cases > suitable for lowering by jump tables or bit tests as it went along. To increase > the likelihood of finding jump tables, the binary tree pivot was selected to > maximize case density on both sides of the pivot. > > By not selecting the pivot in the middle, the binary trees would not always > be balanced, leading to performance problems in the generated code. > > This patch rewrites the lowering to search for clusters of cases > suitable for jump tables or bit tests first, and then builds the binary > tree around those clusters. This way, the binary tree will always be balanced. > > This has the added benefit of decoupling the different aspects of the lowering: > tree building and jump table or bit tests finding are now easier to tweak > separately. > > For example, this will enable us to balance the tree based on profile info > in the future. > > The algorithm for finding jump tables is quadratic, whereas the previous algorithm > was O(n log n) for common cases, and quadratic only in the worst-case. This > doesn't seem to be major problem in practice, e.g. compiling a file consisting > of a 10k-case switch was only 30% slower, and such large switches should be rare > in practice. Compiling e.g. gcc.c showed no compile-time difference. If this > does turn out to be a problem, we could limit the search space of the algorithm. > > This commit also disables all optimizations during switch lowering in -O0. > > Differential Revision: http://reviews.llvm.org/D8649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235560 91177308-0d34-0410-b5e6-96231b3b80d8
36 lines
1.0 KiB
LLVM
36 lines
1.0 KiB
LLVM
; RUN: llc < %s -print-machineinstrs=expand-isel-pseudos -o /dev/null 2>&1 | FileCheck %s
|
|
|
|
; ARM & AArch64 run an extra SimplifyCFG which disrupts this test.
|
|
; XFAIL: arm,aarch64
|
|
|
|
; Make sure we have the correct weight attached to each successor.
|
|
define i32 @test2(i32 %x) nounwind uwtable readnone ssp {
|
|
; CHECK: Machine code for function test2:
|
|
entry:
|
|
%conv = sext i32 %x to i64
|
|
switch i64 %conv, label %return [
|
|
i64 0, label %sw.bb
|
|
i64 1, label %sw.bb
|
|
i64 4, label %sw.bb
|
|
i64 5, label %sw.bb1
|
|
], !prof !0
|
|
; CHECK: BB#0: derived from LLVM BB %entry
|
|
; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
|
|
; CHECK: BB#4: derived from LLVM BB %entry
|
|
; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
|
|
; CHECK: BB#5: derived from LLVM BB %entry
|
|
; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
|
|
|
|
sw.bb:
|
|
br label %return
|
|
|
|
sw.bb1:
|
|
br label %return
|
|
|
|
return:
|
|
%retval.0 = phi i32 [ 5, %sw.bb1 ], [ 1, %sw.bb ], [ 0, %entry ]
|
|
ret i32 %retval.0
|
|
}
|
|
|
|
!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
|