mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 21:35:07 +00:00
4a74b3b933
Coalescing can remove copy-like instructions with sub-register operands that constrained the register class. Examples are: x86: GR32_ABCD:sub_8bit_hi -> GR32 arm: DPR_VFP2:ssub0 -> DPR Recompute the register class of any virtual registers that are used by less instructions after coalescing. This affects code generation for the Cortex-A8 where we use NEON instructions for f32 operations, c.f. fp_convert.ll: vadd.f32 d16, d1, d0 vcvt.s32.f32 d0, d16 The register allocator is now free to use d16 for the temporary, and that comes first in the allocation order because it doesn't interfere with any s-registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137133 91177308-0d34-0410-b5e6-96231b3b80d8
51 lines
1.2 KiB
LLVM
51 lines
1.2 KiB
LLVM
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
|
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
|
|
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
|
|
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
|
|
|
|
define i32 @test1(float %a, float %b) {
|
|
; VFP2: test1:
|
|
; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
|
|
; NEON: test1:
|
|
; NEON: vadd.f32 [[D0:d[0-9]+]]
|
|
; NEON: vcvt.s32.f32 d0, [[D0]]
|
|
entry:
|
|
%0 = fadd float %a, %b
|
|
%1 = fptosi float %0 to i32
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @test2(float %a, float %b) {
|
|
; VFP2: test2:
|
|
; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
|
|
; NEON: test2:
|
|
; NEON: vadd.f32 [[D0:d[0-9]+]]
|
|
; NEON: vcvt.u32.f32 d0, [[D0]]
|
|
entry:
|
|
%0 = fadd float %a, %b
|
|
%1 = fptoui float %0 to i32
|
|
ret i32 %1
|
|
}
|
|
|
|
define float @test3(i32 %a, i32 %b) {
|
|
; VFP2: test3:
|
|
; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
|
|
; NEON: test3:
|
|
; NEON: vcvt.f32.u32 d0, d0
|
|
entry:
|
|
%0 = add i32 %a, %b
|
|
%1 = uitofp i32 %0 to float
|
|
ret float %1
|
|
}
|
|
|
|
define float @test4(i32 %a, i32 %b) {
|
|
; VFP2: test4:
|
|
; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
|
|
; NEON: test4:
|
|
; NEON: vcvt.f32.s32 d0, d0
|
|
entry:
|
|
%0 = add i32 %a, %b
|
|
%1 = sitofp i32 %0 to float
|
|
ret float %1
|
|
}
|