From fcba5e6b645df89ae6b93911fe0f80b08fa6b44c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 11 Aug 2010 15:44:15 +0000 Subject: [PATCH] cortex m4 has floating point support, but only single precision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110810 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 4 +++- lib/Target/ARM/ARMISelLowering.cpp | 3 ++- lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 5 +++++ test/CodeGen/Thumb2/cortex-fp.ll | 24 ++++++++++++++++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/Thumb2/cortex-fp.ll diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b9310bbb9f6..0791e679276 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -40,6 +40,8 @@ def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb / dsb) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports single precision only">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -155,7 +157,7 @@ def : Processor<"cortex-a9", CortexA9Itineraries, // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; -def : ProcNoItin<"cortex-m4", [ArchV7M]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e4b556228fc..073528abe8d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -266,7 +266,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::GPRRegisterClass); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); - addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + if (!Subtarget->isFPOnlySP()) + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b4eb83e9060..cb539f4c01e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -45,6 +45,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , FPOnlySP(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ad9fc11b201..67e58038ee7 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -95,6 +95,10 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// FPOnlySP - If true, the floating point unit only supports single + /// precision. + bool FPOnlySP; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -151,6 +155,7 @@ protected: bool hasDataBarrier() const { return HasDataBarrier; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } + bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool hasFP16() const { return HasFP16; } diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll new file mode 100644 index 00000000000..ba891d081ed --- /dev/null +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3 +; RUN: llc < %s -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4 +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 + + +define float @foo(float %a, float %b) { +entry: +; CHECK: foo +; CORTEXM3: blx ___mulsf3 +; CORTEXM4: vmul.f32 s0, s1, s0 +; CORTEXA8: vmul.f32 d0, d1, d0 + %0 = fmul float %a, %b + ret float %0 +} + +define double @bar(double %a, double %b) { +entry: +; CHECK: bar + %0 = fmul double %a, %b +; CORTEXM3: blx ___muldf3 +; CORTEXM4: blx ___muldf3 +; CORTEXA8: vmul.f64 d0, d1, d0 + ret double %0 +}