Teach SDISel to combine fsin / fcos into a fsincos node if the following

conditions are met:
1. They share the same operand and are in the same BB.
2. Both outputs are used.
3. The target has a native instruction that maps to ISD::FSINCOS node or
   the target provides a sincos library call.

Implemented the generic optimization in sdisel and enabled it for
Mac OSX. Also added an additional optimization for x86_64 Mac OSX by
using an alternative entry point __sincos_stret which returns the two
results in xmm0 / xmm1.

rdar://13087969
PR13204


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173755 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2013-01-29 02:32:37 +00:00
parent 8232ece5c1
commit 8688a58c53
15 changed files with 248 additions and 24 deletions

View File

@ -455,6 +455,9 @@ namespace ISD {
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
FLOG, FLOG2, FLOG10, FEXP, FEXP2, FLOG, FLOG2, FLOG10, FEXP, FEXP2,
FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR, FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
/// LOAD and STORE have token chains as their first operand, then the same /// LOAD and STORE have token chains as their first operand, then the same
/// operands as an LLVM load/store instruction, then an offset node that /// operands as an LLVM load/store instruction, then an offset node that

View File

@ -158,6 +158,11 @@ namespace RTLIB {
COS_F80, COS_F80,
COS_F128, COS_F128,
COS_PPCF128, COS_PPCF128,
SINCOS_F32,
SINCOS_F64,
SINCOS_F80,
SINCOS_F128,
SINCOS_PPCF128,
POW_F32, POW_F32,
POW_F64, POW_F64,
POW_F80, POW_F80,

View File

@ -102,7 +102,8 @@ private:
SDNode *Node, bool isSigned); SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I16,
@ -110,6 +111,7 @@ private:
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128); RTLIB::Libcall Call_I128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandBUILD_VECTOR(SDNode *Node);
@ -2095,6 +2097,106 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem); Results.push_back(Rem);
} }
/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
case MVT::f80: LC = RTLIB::SINCOS_F80; break;
case MVT::f128: LC = RTLIB::SINCOS_F128; break;
case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
}
return TLI.getLibcallName(LC) != 0;
}
/// useSinCos - Only issue sincos libcall if both sin and cos are
/// needed.
static bool useSinCos(SDNode *Node) {
unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
? ISD::FCOS : ISD::FSIN;
SDValue Op0 = Node->getOperand(0);
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User == Node)
continue;
// The other user might have been turned into sincos already.
if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
return true;
}
return false;
}
/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
/// pairs.
void
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
case MVT::f80: LC = RTLIB::SINCOS_F80; break;
case MVT::f128: LC = RTLIB::SINCOS_F128; break;
case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
}
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
SDValue InChain = DAG.getEntryNode();
EVT RetVT = Node->getValueType(0);
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
// Pass the argument.
Entry.Node = Node->getOperand(0);
Entry.Ty = RetTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
Entry.Ty = RetTy->getPointerTo();
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
Entry.Ty = RetTy->getPointerTo();
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
DebugLoc dl = Node->getDebugLoc();
TargetLowering::
CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
MachinePointerInfo(), false, false, false, 0));
Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
MachinePointerInfo(), false, false, false, 0));
}
/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that /// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are /// we expand it. At this point, we know that the result and operand types are
@ -3041,14 +3143,33 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::SQRT_PPCF128)); RTLIB::SQRT_PPCF128));
break; break;
case ISD::FSIN: case ISD::FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, case ISD::FCOS: {
RTLIB::SIN_F80, RTLIB::SIN_F128, EVT VT = Node->getValueType(0);
RTLIB::SIN_PPCF128)); bool isSIN = Node->getOpcode() == ISD::FSIN;
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
isSinCosLibcallAvailable(Node, TLI))
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
if (!isSIN)
Tmp1 = Tmp1.getValue(1);
Results.push_back(Tmp1);
} else if (isSIN) {
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
RTLIB::SIN_F80, RTLIB::SIN_F128,
RTLIB::SIN_PPCF128));
} else {
Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128));
}
break; break;
case ISD::FCOS: }
Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, case ISD::FSINCOS:
RTLIB::COS_F80, RTLIB::COS_F128, // Expand into sincos libcall.
RTLIB::COS_PPCF128)); ExpandSinCosLibCall(Node, Results);
break; break;
case ISD::FLOG: case ISD::FLOG:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
@ -3181,7 +3302,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::UREM: case ISD::UREM:
case ISD::SREM: { case ISD::SREM: {
EVT VT = Node->getValueType(0); EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
bool isSigned = Node->getOpcode() == ISD::SREM; bool isSigned = Node->getOpcode() == ISD::SREM;
unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
@ -3192,6 +3312,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If div is legal, it's better to do the normal expansion // If div is legal, it's better to do the normal expansion
!TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
useDivRem(Node, isSigned, false))) { useDivRem(Node, isSigned, false))) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y // X % Y -> X-X/Y*Y

View File

@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FSQRT: return "fsqrt"; case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin"; case ISD::FSIN: return "fsin";
case ISD::FCOS: return "fcos"; case ISD::FCOS: return "fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc"; case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor"; case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil"; case ISD::FCEIL: return "fceil";

View File

@ -340,6 +340,13 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
// These are generally not available.
Names[RTLIB::SINCOS_F32] = 0;
Names[RTLIB::SINCOS_F64] = 0;
Names[RTLIB::SINCOS_F80] = 0;
Names[RTLIB::SINCOS_F128] = 0;
Names[RTLIB::SINCOS_PPCF128] = 0;
} }
/// InitLibcallCallingConvs - Set default libcall CallingConvs. /// InitLibcallCallingConvs - Set default libcall CallingConvs.

View File

@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&

View File

@ -1364,6 +1364,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);

View File

@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand);

View File

@ -421,6 +421,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand);

View File

@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We don't support sin/cos/sqrt/fmod/pow // We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal);

View File

@ -759,10 +759,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand);

View File

@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod // We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special // Expand FP immediates into loads from the stack, except for the special
// cases we handle. // cases we handle.
@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod // We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants. // Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps addLegalFPImmediate(APFloat(+0.0f)); // xorps
@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) { if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
} }
} else if (!TM.Options.UseSoftFloat) { } else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87. // f32 and f64 in x87.
@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) { if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f32 , Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32 , Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
} }
addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(+1.0)); // FLD1
@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
} }
if (!TM.Options.UnsafeFPMath) { if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
} }
setOperationAction(ISD::FFLOOR, MVT::f80, Expand); setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand);
@ -1281,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setLibcallName(RTLIB::SRA_I128, 0); setLibcallName(RTLIB::SRA_I128, 0);
} }
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget->isTargetDarwin() && Subtarget->is64Bit()) {
// For MacOSX, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret to avoid memory
// traffic.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
}
// We have target-specific dag combine patterns for the following nodes: // We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@ -12014,6 +12036,50 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2)); Op.getOperand(1), Op.getOperand(2));
} }
SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two XMM registers.
DebugLoc dl = Op.getDebugLoc();
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
const char *LibcallName = (ArgVT == MVT::f64)
? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
TargetLowering::
CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
false, false, false, false, 0,
CallingConv::C, /*isTaillCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed*/true,
Callee, Args, DAG, dl);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
#if 1
return CallResult.first;
#else
SDValue RetSin = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(0));
SDValue RetCos = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(1));
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, RetSin, RetCos);
#endif
}
/// LowerOperation - Provide custom lowering hooks for some operations. /// LowerOperation - Provide custom lowering hooks for some operations.
/// ///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@ -12096,6 +12162,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD: return LowerADD(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
} }
} }

View File

@ -838,8 +838,8 @@ namespace llvm {
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;

View File

@ -155,6 +155,11 @@ const char *X86Subtarget::getBZeroEntry() const {
return 0; return 0;
} }
bool X86Subtarget::hasSinCos() const {
return getTargetTriple().isMacOSX() &&
!getTargetTriple().isMacOSXVersionLT(10, 9);
}
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address. /// to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {

View File

@ -328,6 +328,10 @@ public:
/// memset with zero passed as the second argument. Otherwise it /// memset with zero passed as the second argument. Otherwise it
/// returns null. /// returns null.
const char *getBZeroEntry() const; const char *getBZeroEntry() const;
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
/// enablePostRAScheduler - run for Atom optimization. /// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,