Change ARM scheduling default to list-hybrid if the target supports floating point instructions (and is not using soft float).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104307 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-17 12:40:40 +00:00 · 2010-05-21 00:43:17 +00:00 · 2010-05-21 00:43:17 +00:00 · f7d87ee158
commit f7d87ee158
parent b11ac950d6
9 changed files with 18 additions and 15 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -467,7 +467,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)

  setStackPointerRegisterToSaveRestore(ARM::SP);

-  setSchedulingPreference(Sched::RegPressure);
+  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+    setSchedulingPreference(Sched::RegPressure);
+  else
+    setSchedulingPreference(Sched::Hybrid);

  // FIXME: If-converter should use instruction latency to determine
  // profitability rather than relying on fixed limits.
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8: 	vabs.f32	d1, d1
 ; CORTEXA9: test:
-; CORTEXA9: 	vabs.f32	s1, s1
+; CORTEXA9: 	vabs.f32	s0, s0
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vadd.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s0, s1, s0
+; CORTEXA9: 	vadd.f32	s0, s0, s1
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vdiv.f32	s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vdiv.f32	s0, s1, s0
+; CORTEXA9: 	vdiv.f32	s0, s0, s1
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@ -21,4 +21,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmla.f32	s2, s1, s0
+; CORTEXA9: 	vmla.f32	s0, s1, s2
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@ -21,4 +21,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vnmls.f32	s2, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vnmls.f32	s2, s1, s0
+; CORTEXA9: 	vnmls.f32	s0, s1, s2
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s0, s1, s0
+; CORTEXA9: 	vmul.f32	s0, s0, s1
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@ -4,7 +4,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s

 define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 entry:
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
@ -13,7 +13,7 @@ entry:
 }

 define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 entry:
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.

 %struct.int16x8_t = type { <8 x i16> }
@ -45,12 +45,12 @@ define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocaptur
 entry:
 ; CHECK:        t2:
 ; CHECK:        vld1.16
-; CHECK-NOT:    vmov
-; CHECK:        vmul.i16
 ; CHECK:        vld1.16
-; CHECK:        vst1.16
 ; CHECK-NOT:    vmov
 ; CHECK:        vmul.i16
+; CHECK:        vmul.i16
+; CHECK-NOT:    vmov
+; CHECK:        vst1.16
 ; CHECK:        vst1.16
  %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
@ -238,8 +238,8 @@ bb14:                                             ; preds = %bb6
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
 ; CHECK:        vldr.64
-; CHECK-NEXT:   vstmia r0, {d0,d1}
-; CHECK-NEXT:   vmov.i8 d1
+; CHECK:        vmov.i8 d1
+; CHECK-NEXT:   vstmia r0, {d2,d3}
 ; CHECK-NEXT:   vstmia r0, {d0,d1}
  %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]