From f7d87ee1584bffe361b39f8cec7a39131c8c4efc Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 21 May 2010 00:43:17 +0000 Subject: [PATCH] Change ARM scheduling default to list-hybrid if the target supports floating point instructions (and is not using soft float). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104307 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 5 ++++- test/CodeGen/ARM/fabss.ll | 2 +- test/CodeGen/ARM/fadds.ll | 2 +- test/CodeGen/ARM/fdivs.ll | 2 +- test/CodeGen/ARM/fmacs.ll | 2 +- test/CodeGen/ARM/fmscs.ll | 2 +- test/CodeGen/ARM/fmuls.ll | 2 +- test/CodeGen/ARM/fnmscs.ll | 4 ++-- test/CodeGen/ARM/reg_sequence.ll | 12 ++++++------ 9 files changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 340e8870443..c5a6a8e1191 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -467,7 +467,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); - setSchedulingPreference(Sched::RegPressure); + if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Hybrid); // FIXME: If-converter should use instruction latency to determine // profitability rather than relying on fixed limits. diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index f03282bdab7..dfc1e0a957c 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -24,4 +24,4 @@ declare float @fabsf(float) ; CORTEXA8: test: ; CORTEXA8: vabs.f32 d1, d1 ; CORTEXA9: test: -; CORTEXA9: vabs.f32 s1, s1 +; CORTEXA9: vabs.f32 s0, s0 diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 749690e98d0..113f0e29bd1 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vadd.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s0, s1, s0 +; CORTEXA9: vadd.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 0c314957929..9af1217de1d 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vdiv.f32 s0, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vdiv.f32 s0, s1, s0 +; CORTEXA9: vdiv.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index f8b47b5bac0..c4ceca9828b 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -21,4 +21,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmla.f32 s2, s1, s0 +; CORTEXA9: vmla.f32 s0, s1, s2 diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index 7a70543dee6..103ce334519 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -21,4 +21,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vnmls.f32 s2, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vnmls.f32 s2, s1, s0 +; CORTEXA9: vnmls.f32 s0, s1, s2 diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index ef4e3e52818..bfafd20c860 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s0, s1, s0 +; CORTEXA9: vmul.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 6b7cefa6414..0b47edd5f1f 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s define float @test1(float %acc, float %a, float %b) nounwind { -; CHECK: vnmla.f32 s2, s1, s0 +; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}} entry: %0 = fmul float %a, %b %1 = fsub float -0.0, %0 @@ -13,7 +13,7 @@ entry: } define float @test2(float %acc, float %a, float %b) nounwind { -; CHECK: vnmla.f32 s2, s1, s0 +; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}} entry: %0 = fmul float %a, %b %1 = fmul float -1.0, %0 diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 1a27d4d9397..3ba82ccdfa9 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's. %struct.int16x8_t = type { <8 x i16> } @@ -45,12 +45,12 @@ define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocaptur entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK-NOT: vmov -; CHECK: vmul.i16 ; CHECK: vld1.16 -; CHECK: vst1.16 ; CHECK-NOT: vmov ; CHECK: vmul.i16 +; CHECK: vmul.i16 +; CHECK-NOT: vmov +; CHECK: vst1.16 ; CHECK: vst1.16 %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] %1 = load <8 x i16>* %0, align 16 ; <<8 x i16>> [#uses=1] @@ -238,8 +238,8 @@ bb14: ; preds = %bb6 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: t9: ; CHECK: vldr.64 -; CHECK-NEXT: vstmia r0, {d0,d1} -; CHECK-NEXT: vmov.i8 d1 +; CHECK: vmov.i8 d1 +; CHECK-NEXT: vstmia r0, {d2,d3} ; CHECK-NEXT: vstmia r0, {d0,d1} %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1]