llvm-6502/test/CodeGen/ARM/fast-isel-pred.ll
JF Bastien fe532ad6d6 Enable FastISel on ARM for Linux and NaCl, not MCJIT
This is a resubmit of r182877, which was reverted because it broken
MCJIT tests on ARM. The patch leaves MCJIT on ARM as it was before: only
enabled for iOS. I've CC'ed people from the original review and revert.

FastISel was only enabled for iOS ARM and Thumb2, this patch enables it
for ARM (not Thumb2) on Linux and NaCl, but not MCJIT.

Thumb2 support needs a bit more work, mainly around register class
restrictions.

The patch punts to SelectionDAG when doing TLS relocation on non-Darwin
targets. I will fix this and other FastISel-to-SelectionDAG failures in
a separate patch.

The patch also forces FastISel to retain frame pointers: iOS always
keeps them for backtracking (so emitted code won't change because of
this), but Linux was getting much worse code that was incorrect when
using big frames (such as test-suite's lencod). I'll also fix this in a
later patch, it will probably require a peephole so that FastISel
doesn't rematerialize frame pointers back-to-back.

The test changes are straightforward, similar to:
  http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130513/174279.html
They also add a vararg test that got dropped in that change.

I ran all of lnt test-suite on A15 hardware with --optimize-option=-O0
and all the tests pass. All the tests also pass on x86 make check-all. I
also re-ran the check-all tests that failed on ARM, and they all seem to
pass.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183966 91177308-0d34-0410-b5e6-96231b3b80d8
2013-06-14 02:49:43 +00:00

60 lines
2.3 KiB
LLVM

; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi < %s
define i32 @main() nounwind ssp {
entry:
%retval = alloca i32, align 4
%X = alloca <4 x i32>, align 16
%Y = alloca <4 x float>, align 16
store i32 0, i32* %retval
%tmp = load <4 x i32>* %X, align 16
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
%0 = load i32* %retval
ret i32 %0
}
define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
entry:
%__a.addr.i = alloca <4 x i32>, align 16
%v.addr = alloca <4 x i32>, align 16
%p.addr = alloca i8*, align 4
%offset.addr = alloca i32, align 4
%constants.addr = alloca <4 x float>*, align 4
store <4 x i32> %v, <4 x i32>* %v.addr, align 16
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
%tmp = load <4 x i32>* %v.addr, align 16
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
%tmp.i = load <4 x i32>* %__a.addr.i, align 16
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
%1 = bitcast <16 x i8> %0 to <4 x i32>
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
%tmp1 = load i8** %p.addr, align 4
%tmp2 = load i32* %offset.addr, align 4
%tmp3 = load <4 x float>** %constants.addr, align 4
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
ret void
}
define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
entry:
%v.addr = alloca <4 x float>, align 16
%p.addr = alloca i8*, align 4
%offset.addr = alloca i32, align 4
%constants.addr = alloca <4 x float>*, align 4
%data = alloca i64, align 4
store <4 x float> %v, <4 x float>* %v.addr, align 16
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
%tmp = load i64* %data, align 4
%tmp1 = load i8** %p.addr, align 4
%tmp2 = load i32* %offset.addr, align 4
%add.ptr = getelementptr i8* %tmp1, i32 %tmp2
%0 = bitcast i8* %add.ptr to i64*
%arrayidx = getelementptr inbounds i64* %0, i32 0
store i64 %tmp, i64* %arrayidx
ret void
}