llvm-6502/test/CodeGen/ARM/fast-isel-call.ll
Juergen Ributzka f08cddcf56 Reapply [FastISel] Let the target decide first if it wants to materialize a constant (215588).
Note: This was originally reverted to track down a buildbot error. This commit
exposed a latent bug that was fixed in r215753. Therefore it is reapplied
without any modifications.

I run it through SPEC2k and SPEC2k6 for AArch64 and it didn't introduce any new
regeressions.

Original commit message:
This changes the order in which FastISel tries to materialize a constant.
Originally it would try to use a simple target-independent approach, which
can lead to the generation of inefficient code.

On X86 this would result in the use of movabsq to materialize any 64bit
integer constant - even for simple and small values such as 0 and 1. Also
some very funny floating-point materialization could be observed too.

On AArch64 it would materialize the constant 0 in a register even the
architecture has an actual "zero" register.

On ARM it would generate unnecessary mov instructions or not use mvn.

This change simply changes the order and always asks the target first if it
likes to materialize the constant. This doesn't fix all the issues
mentioned above, but it enables the targets to implement such
optimizations.

Related to <rdar://problem/17420988>.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216006 91177308-0d34-0410-b5e6-96231b3b80d8
2014-08-19 19:05:24 +00:00

263 lines
7.8 KiB
LLVM

; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
; Note that some of these tests assume that relocations are either
; movw/movt or constant pool loads. Different platforms will select
; different approaches.
define i32 @t0(i1 zeroext %a) nounwind {
%1 = zext i1 %a to i32
ret i32 %1
}
define i32 @t1(i8 signext %a) nounwind {
%1 = sext i8 %a to i32
ret i32 %1
}
define i32 @t2(i8 zeroext %a) nounwind {
%1 = zext i8 %a to i32
ret i32 %1
}
define i32 @t3(i16 signext %a) nounwind {
%1 = sext i16 %a to i32
ret i32 %1
}
define i32 @t4(i16 zeroext %a) nounwind {
%1 = zext i16 %a to i32
ret i32 %1
}
define void @foo(i8 %a, i16 %b) nounwind {
; ARM: foo
; THUMB: foo
;; Materialize i1 1
; ARM: movw r2, #1
;; zero-ext
; ARM: and r2, r2, #1
; THUMB: and r2, r2, #1
%1 = call i32 @t0(i1 zeroext 1)
; ARM: sxtb r2, r1
; ARM: mov r0, r2
; THUMB: sxtb r2, r1
; THUMB: mov r0, r2
%2 = call i32 @t1(i8 signext %a)
; ARM: and r2, r1, #255
; ARM: mov r0, r2
; THUMB: and r2, r1, #255
; THUMB: mov r0, r2
%3 = call i32 @t2(i8 zeroext %a)
; ARM: sxth r2, r1
; ARM: mov r0, r2
; THUMB: sxth r2, r1
; THUMB: mov r0, r2
%4 = call i32 @t3(i16 signext %b)
; ARM: uxth r2, r1
; ARM: mov r0, r2
; THUMB: uxth r2, r1
; THUMB: mov r0, r2
%5 = call i32 @t4(i16 zeroext %b)
;; A few test to check materialization
;; Note: i1 1 was materialized with t1 call
; ARM: movw r1, #255
%6 = call i32 @t2(i8 zeroext 255)
; ARM: movw r1, #65535
; THUMB: movw r1, #65535
%7 = call i32 @t4(i16 zeroext 65535)
ret void
}
define void @foo2() nounwind {
%1 = call signext i16 @t5()
%2 = call zeroext i16 @t6()
%3 = call signext i8 @t7()
%4 = call zeroext i8 @t8()
%5 = call zeroext i1 @t9()
ret void
}
declare signext i16 @t5();
declare zeroext i16 @t6();
declare signext i8 @t7();
declare zeroext i8 @t8();
declare zeroext i1 @t9();
define i32 @t10() {
entry:
; ARM: @t10
; ARM: movw [[R0:l?r[0-9]*]], #0
; ARM: movw [[R1:l?r[0-9]*]], #248
; ARM: movw [[R2:l?r[0-9]*]], #187
; ARM: movw [[R3:l?r[0-9]*]], #28
; ARM: movw [[R4:l?r[0-9]*]], #40
; ARM: movw [[R5:l?r[0-9]*]], #186
; ARM: and [[R0]], [[R0]], #255
; ARM: and [[R1]], [[R1]], #255
; ARM: and [[R2]], [[R2]], #255
; ARM: and [[R3]], [[R3]], #255
; ARM: and [[R4]], [[R4]], #255
; ARM: str [[R4]], [sp]
; ARM: and [[R4]], [[R5]], #255
; ARM: str [[R4]], [sp, #4]
; ARM: bl {{_?}}bar
; ARM-LONG: @t10
; ARM-LONG: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
; ARM-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
; ARM-LONG: ldr [[R]], {{\[}}[[R]]{{\]}}
; ARM-LONG: blx [[R]]
; THUMB: @t10
; THUMB: movs [[R0:l?r[0-9]*]], #0
; THUMB: movs [[R1:l?r[0-9]*]], #248
; THUMB: movs [[R2:l?r[0-9]*]], #187
; THUMB: movs [[R3:l?r[0-9]*]], #28
; THUMB: movw [[R4:l?r[0-9]*]], #40
; THUMB: movw [[R5:l?r[0-9]*]], #186
; THUMB: and [[R0]], [[R0]], #255
; THUMB: and [[R1]], [[R1]], #255
; THUMB: and [[R2]], [[R2]], #255
; THUMB: and [[R3]], [[R3]], #255
; THUMB: and [[R4]], [[R4]], #255
; THUMB: str.w [[R4]], [sp]
; THUMB: and [[R4]], [[R5]], #255
; THUMB: str.w [[R4]], [sp, #4]
; THUMB: bl {{_?}}bar
; THUMB-LONG: @t10
; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
; THUMB-LONG: blx [[R]]
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
ret i32 0
}
declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
define i32 @bar0(i32 %i) nounwind {
ret i32 0
}
define void @foo3() uwtable {
; ARM: movw r0, #0
; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}}
; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
; ARM: blx r1
; THUMB: movs r0, #0
; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}}
; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
; THUMB: blx r1
%fptr = alloca i32 (i32)*, align 8
store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
%1 = load i32 (i32)** %fptr, align 8
%call = call i32 %1(i32 0)
ret void
}
define i32 @LibCall(i32 %a, i32 %b) {
entry:
; ARM: LibCall
; ARM: bl {{___udivsi3|__aeabi_uidiv}}
; ARM-LONG: LibCall
; ARM-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}}
; ARM-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
; ARM-LONG: ldr r2, [r2]
; ARM-LONG: blx r2
; THUMB: LibCall
; THUMB: bl {{___udivsi3|__aeabi_uidiv}}
; THUMB-LONG: LibCall
; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}}
; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
; THUMB-LONG: ldr r2, [r2]
; THUMB-LONG: blx r2
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
; Test fastcc
define fastcc void @fast_callee(float %i) ssp {
entry:
; ARM: fast_callee
; ARM: vmov r0, s0
; THUMB: fast_callee
; THUMB: vmov r0, s0
; ARM-NOVFP: fast_callee
; ARM-NOVFP-NOT: s0
; THUMB-NOVFP: fast_callee
; THUMB-NOVFP-NOT: s0
call void @print(float %i)
ret void
}
define void @fast_caller() ssp {
entry:
; ARM: fast_caller
; ARM: vldr s0,
; THUMB: fast_caller
; THUMB: vldr s0,
; ARM-NOVFP: fast_caller
; ARM-NOVFP: movw r0, #13107
; ARM-NOVFP: movt r0, #16611
; THUMB-NOVFP: fast_caller
; THUMB-NOVFP: movw r0, #13107
; THUMB-NOVFP: movt r0, #16611
call fastcc void @fast_callee(float 0x401C666660000000)
ret void
}
define void @no_fast_callee(float %i) ssp {
entry:
; ARM: no_fast_callee
; ARM: vmov s0, r0
; THUMB: no_fast_callee
; THUMB: vmov s0, r0
; ARM-NOVFP: no_fast_callee
; ARM-NOVFP-NOT: s0
; THUMB-NOVFP: no_fast_callee
; THUMB-NOVFP-NOT: s0
call void @print(float %i)
ret void
}
define void @no_fast_caller() ssp {
entry:
; ARM: no_fast_caller
; ARM: vmov r0, s0
; THUMB: no_fast_caller
; THUMB: vmov r0, s0
; ARM-NOVFP: no_fast_caller
; ARM-NOVFP: movw r0, #13107
; ARM-NOVFP: movt r0, #16611
; THUMB-NOVFP: no_fast_caller
; THUMB-NOVFP: movw r0, #13107
; THUMB-NOVFP: movt r0, #16611
call void @no_fast_callee(float 0x401C666660000000)
ret void
}
declare void @bar2(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6)
define void @call_undef_args() {
; ARM-LABEL: call_undef_args
; ARM: movw r0, #1
; ARM-NEXT: movw r1, #2
; ARM-NEXT: movw r2, #3
; ARM-NEXT: movw r3, #4
; ARM-NOT: str {{r[0-9]+}}, [sp]
; ARM: movw [[REG:l?r[0-9]*]], #6
; ARM-NEXT: str [[REG]], [sp, #4]
call void @bar2(i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6)
ret void
}
declare void @print(float)