mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-21 19:32:16 +00:00
f567a6d39b
FastISel was only enabled for iOS ARM and Thumb2, this patch enables it for ARM (not Thumb2) on Linux and NaCl. Thumb2 support needs a bit more work, mainly around register class restrictions. The patch punts to SelectionDAG when doing TLS relocation on non-Darwin targets. I will fix this and other FastISel-to-SelectionDAG failures in a separate patch. The patch also forces FastISel to retain frame pointers: iOS always keeps them for backtracking (so emitted code won't change because of this), but Linux was getting much worse code that was incorrect when using big frames (such as test-suite's lencod). I'll also fix this in a later patch, it will probably require a peephole so that FastISel doesn't rematerialize frame pointers back-to-back. The test changes are straightforward, similar to: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130513/174279.html They also add a vararg test that got dropped in that change. I ran all of test-suite on A15 hardware with --optimize-option=-O0 and all the tests pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182877 91177308-0d34-0410-b5e6-96231b3b80d8
60 lines
2.3 KiB
LLVM
60 lines
2.3 KiB
LLVM
; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
|
|
; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi < %s
|
|
|
|
define i32 @main() nounwind ssp {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%X = alloca <4 x i32>, align 16
|
|
%Y = alloca <4 x float>, align 16
|
|
store i32 0, i32* %retval
|
|
%tmp = load <4 x i32>* %X, align 16
|
|
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
|
|
%0 = load i32* %retval
|
|
ret i32 %0
|
|
}
|
|
|
|
define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
|
entry:
|
|
%__a.addr.i = alloca <4 x i32>, align 16
|
|
%v.addr = alloca <4 x i32>, align 16
|
|
%p.addr = alloca i8*, align 4
|
|
%offset.addr = alloca i32, align 4
|
|
%constants.addr = alloca <4 x float>*, align 4
|
|
store <4 x i32> %v, <4 x i32>* %v.addr, align 16
|
|
store i8* %p, i8** %p.addr, align 4
|
|
store i32 %offset, i32* %offset.addr, align 4
|
|
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
|
%tmp = load <4 x i32>* %v.addr, align 16
|
|
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
|
|
%tmp.i = load <4 x i32>* %__a.addr.i, align 16
|
|
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
|
|
%1 = bitcast <16 x i8> %0 to <4 x i32>
|
|
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
|
|
%tmp1 = load i8** %p.addr, align 4
|
|
%tmp2 = load i32* %offset.addr, align 4
|
|
%tmp3 = load <4 x float>** %constants.addr, align 4
|
|
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
|
entry:
|
|
%v.addr = alloca <4 x float>, align 16
|
|
%p.addr = alloca i8*, align 4
|
|
%offset.addr = alloca i32, align 4
|
|
%constants.addr = alloca <4 x float>*, align 4
|
|
%data = alloca i64, align 4
|
|
store <4 x float> %v, <4 x float>* %v.addr, align 16
|
|
store i8* %p, i8** %p.addr, align 4
|
|
store i32 %offset, i32* %offset.addr, align 4
|
|
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
|
%tmp = load i64* %data, align 4
|
|
%tmp1 = load i8** %p.addr, align 4
|
|
%tmp2 = load i32* %offset.addr, align 4
|
|
%add.ptr = getelementptr i8* %tmp1, i32 %tmp2
|
|
%0 = bitcast i8* %add.ptr to i64*
|
|
%arrayidx = getelementptr inbounds i64* %0, i32 0
|
|
store i64 %tmp, i64* %arrayidx
|
|
ret void
|
|
}
|