From cb01efb7988d119d6e2aedab1740695aa6a9cc0c Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Sun, 3 Nov 2013 06:14:38 +0000 Subject: [PATCH] Enable optimization of sin / cos pair into call to __sincos_stret for iOS7+. rdar://12856873 Patch by Evan Cheng, with a fix for rdar://13209539 by Tilmann Scheller git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193942 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 77 ++++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 1 + lib/Target/ARM/ARMSubtarget.cpp | 5 ++ lib/Target/ARM/ARMSubtarget.h | 4 ++ test/CodeGen/ARM/sincos.ll | 38 +++++++++++++++ 5 files changed, 125 insertions(+) create mode 100644 test/CodeGen/ARM/sincos.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2de2dfa0ca7..9dee9314f68 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -869,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } + + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + // For iOS, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine @@ -5950,6 +5962,70 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For iOS, we want to call an alternative entry point: __sincos_stret, + // return values are passed via sret. + SDLoc dl(Op); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Pair of floats / doubles used to pass the result. + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + + // Create stack object for sret. + const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); + const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/false, + Callee, Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); + + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, + MachinePointerInfo(), false, false, false, 0); + + // Address of cos field. + SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + DAG.getIntPtrConstant(ArgVT.getStoreSize())); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + MachinePointerInfo(), false, false, false, 0); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, + LoadSin.getValue(0), LoadCos.getValue(0)); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast(Op)->getOrdering() <= Monotonic) @@ -6081,6 +6157,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3c80334dc4a..f195381538b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -448,6 +448,7 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 392b6f590eb..f59e522195c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -281,6 +281,11 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel->MispredictPenalty; } +bool ARMSubtarget::hasSinCos() const { + return getTargetTriple().getOS() == Triple::IOS && + !getTargetTriple().isOSVersionLT(7, 0); +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0eb0a4463e4..aab93f1d6c0 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -330,6 +330,10 @@ public: const std::string & getCPUString() const { return CPUString; } unsigned getMispredictionPenalty() const; + + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, diff --git a/test/CodeGen/ARM/sincos.ll b/test/CodeGen/ARM/sincos.ll new file mode 100644 index 00000000000..30b2664e372 --- /dev/null +++ b/test/CodeGen/ARM/sincos.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS + +; Combine sin / cos into a single call. +; rdar://12856873 + +define float @test1(float %x) nounwind { +entry: +; SINCOS-LABEL: test1: +; SINCOS: bl ___sincosf_stret + +; NOOPT-LABEL: test1: +; NOOPT: bl _sinf +; NOOPT: bl _cosf + %call = tail call float @sinf(float %x) nounwind readnone + %call1 = tail call float @cosf(float %x) nounwind readnone + %add = fadd float %call, %call1 + ret float %add +} + +define double @test2(double %x) nounwind { +entry: +; SINCOS-LABEL: test2: +; SINCOS: bl ___sincos_stret + +; NOOPT-LABEL: test2: +; NOOPT: bl _sin +; NOOPT: bl _cos + %call = tail call double @sin(double %x) nounwind readnone + %call1 = tail call double @cos(double %x) nounwind readnone + %add = fadd double %call, %call1 + ret double %add +} + +declare float @sinf(float) readonly +declare double @sin(double) readonly +declare float @cosf(float) readonly +declare double @cos(double) readonly