mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-20 12:31:40 +00:00
cc987d98bb
This is a major rewrite of the SelectionDAG switch lowering. The previous code would lower switches as a binary tre, discovering clusters of cases suitable for lowering by jump tables or bit tests as it went along. To increase the likelihood of finding jump tables, the binary tree pivot was selected to maximize case density on both sides of the pivot. By not selecting the pivot in the middle, the binary trees would not always be balanced, leading to performance problems in the generated code. This patch rewrites the lowering to search for clusters of cases suitable for jump tables or bit tests first, and then builds the binary tree around those clusters. This way, the binary tree will always be balanced. This has the added benefit of decoupling the different aspects of the lowering: tree building and jump table or bit tests finding are now easier to tweak separately. For example, this will enable us to balance the tree based on profile info in the future. The algorithm for finding jump tables is O(n^2), whereas the previous algorithm was O(n log n) for common cases, and quadratic only in the worst-case. This doesn't seem to be major problem in practice, e.g. compiling a file consisting of a 10k-case switch was only 30% slower, and such large switches should be rare in practice. Compiling e.g. gcc.c showed no compile-time difference. If this does turn out to be a problem, we could limit the search space of the algorithm. This commit also disables all optimizations during switch lowering in -O0. Differential Revision: http://reviews.llvm.org/D8649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235101 91177308-0d34-0410-b5e6-96231b3b80d8
197 lines
6.5 KiB
LLVM
197 lines
6.5 KiB
LLVM
; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
|
|
; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
|
|
; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
|
|
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
|
|
|
|
; Run jump table test separately since jump tables aren't generated at -O0.
|
|
; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
|
|
; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
|
|
; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
|
|
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
|
|
|
|
; FIXME: When asm-parse is available, could make this an assembly test.
|
|
|
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
|
target triple = "powerpc64-unknown-linux-gnu"
|
|
|
|
@ei = external global i32
|
|
|
|
define signext i32 @test_external() nounwind {
|
|
entry:
|
|
%0 = load i32, i32* @ei, align 4
|
|
%inc = add nsw i32 %0, 1
|
|
store i32 %inc, i32* @ei, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing external variable ei.
|
|
;
|
|
; MEDIUM: Relocations [
|
|
; MEDIUM: Section {{.*}} .rela.text {
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
|
|
;
|
|
; LARGE: Relocations [
|
|
; LARGE: Section {{.*}} .rela.text {
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
|
|
|
|
@test_fn_static.si = internal global i32 0, align 4
|
|
|
|
define signext i32 @test_fn_static() nounwind {
|
|
entry:
|
|
%0 = load i32, i32* @test_fn_static.si, align 4
|
|
%inc = add nsw i32 %0, 1
|
|
store i32 %inc, i32* @test_fn_static.si, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
|
; accessing function-scoped variable si.
|
|
;
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
|
|
;
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing function-scoped variable si.
|
|
;
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
|
|
|
|
@gi = global i32 5, align 4
|
|
|
|
define signext i32 @test_file_static() nounwind {
|
|
entry:
|
|
%0 = load i32, i32* @gi, align 4
|
|
%inc = add nsw i32 %0, 1
|
|
store i32 %inc, i32* @gi, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
|
; accessing file-scope variable gi.
|
|
;
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
|
|
;
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing file-scope variable gi.
|
|
;
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM3]]
|
|
|
|
define double @test_double_const() nounwind {
|
|
entry:
|
|
ret double 0x3F4FD4920B498CF0
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
|
; accessing a constant.
|
|
;
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM4]]
|
|
;
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing a constant.
|
|
;
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
|
|
|
|
@ti = common global i32 0, align 4
|
|
|
|
define signext i32 @test_tentative() nounwind {
|
|
entry:
|
|
%0 = load i32, i32* @ti, align 4
|
|
%inc = add nsw i32 %0, 1
|
|
store i32 %inc, i32* @ti, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing tentatively declared variable ti.
|
|
;
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
|
;
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
|
|
|
define i8* @test_fnaddr() nounwind {
|
|
entry:
|
|
%func = alloca i32 (i32)*, align 8
|
|
store i32 (i32)* @foo, i32 (i32)** %func, align 8
|
|
%0 = load i32 (i32)*, i32 (i32)** %func, align 8
|
|
%1 = bitcast i32 (i32)* %0 to i8*
|
|
ret i8* %1
|
|
}
|
|
|
|
declare signext i32 @foo(i32 signext)
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing function address foo.
|
|
;
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
|
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
|
;
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
|
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
|
|
|
|
|
define signext i32 @test_jump_table(i32 signext %i) nounwind {
|
|
entry:
|
|
%i.addr = alloca i32, align 4
|
|
store i32 %i, i32* %i.addr, align 4
|
|
%0 = load i32, i32* %i.addr, align 4
|
|
switch i32 %0, label %sw.default [
|
|
i32 3, label %sw.bb
|
|
i32 4, label %sw.bb1
|
|
i32 5, label %sw.bb2
|
|
i32 6, label %sw.bb3
|
|
]
|
|
|
|
sw.default: ; preds = %entry
|
|
br label %sw.epilog
|
|
|
|
sw.bb: ; preds = %entry
|
|
%1 = load i32, i32* %i.addr, align 4
|
|
%mul = mul nsw i32 %1, 7
|
|
store i32 %mul, i32* %i.addr, align 4
|
|
br label %sw.bb1
|
|
|
|
sw.bb1: ; preds = %entry, %sw.bb
|
|
%2 = load i32, i32* %i.addr, align 4
|
|
%dec = add nsw i32 %2, -1
|
|
store i32 %dec, i32* %i.addr, align 4
|
|
br label %sw.bb2
|
|
|
|
sw.bb2: ; preds = %entry, %sw.bb1
|
|
%3 = load i32, i32* %i.addr, align 4
|
|
%add = add nsw i32 %3, 3
|
|
store i32 %add, i32* %i.addr, align 4
|
|
br label %sw.bb3
|
|
|
|
sw.bb3: ; preds = %entry, %sw.bb2
|
|
%4 = load i32, i32* %i.addr, align 4
|
|
%shl = shl i32 %4, 1
|
|
store i32 %shl, i32* %i.addr, align 4
|
|
br label %sw.epilog
|
|
|
|
sw.epilog: ; preds = %sw.bb3, %sw.default
|
|
%5 = load i32, i32* %i.addr, align 4
|
|
ret i32 %5
|
|
}
|
|
|
|
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
|
; accessing a jump table address.
|
|
;
|
|
; MEDIUM-JT: Relocations [
|
|
; MEDIUM-JT: Section (2) .rela.text {
|
|
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
|
|
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
|
|
;
|
|
; LARGE-JT: Relocations [
|
|
; LARGE-JT: Section (2) .rela.text {
|
|
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
|
|
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
|