diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 923b3dab0f8..3fd576ea25a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3344,6 +3344,16 @@ def SSE_SQRTSD : OpndItins< >; } +let Sched = WriteFRsqrt in { +def SSE_RSQRTPS : OpndItins< + IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM +>; + +def SSE_RSQRTSS : OpndItins< + IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM +>; +} + let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM @@ -3622,10 +3632,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; + int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>; defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 7bb3569ad33..73a32304302 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -129,6 +129,7 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 83f053425aa..eca65c2892b 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -117,6 +117,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // 10-14 cycles. defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index a1c21b8711b..a261356afe6 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -63,12 +63,13 @@ def WriteZero : SchedWrite; defm WriteJump : X86SchedWritePair; // Floating point. This covers both scalar and vector operations. -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. -defm WriteFMul : X86SchedWritePair; // Floating point multiplication. -defm WriteFDiv : X86SchedWritePair; // Floating point division. -defm WriteFSqrt : X86SchedWritePair; // Floating point square root. -defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. -defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. @@ -314,6 +315,11 @@ def IIC_SSE_SQRTPD_RM : InstrItinClass; def IIC_SSE_SQRTSD_RR : InstrItinClass; def IIC_SSE_SQRTSD_RM : InstrItinClass; +def IIC_SSE_RSQRTPS_RR : InstrItinClass; +def IIC_SSE_RSQRTPS_RM : InstrItinClass; +def IIC_SSE_RSQRTSS_RR : InstrItinClass; +def IIC_SSE_RSQRTSS_RM : InstrItinClass; + def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; def IIC_SSE_RCPS_RR : InstrItinClass; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index c8820aa2d8d..4c559c9c179 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -224,6 +224,11 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index ab2c520ea89..ce1ece34e43 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -163,15 +163,15 @@ defm : JWriteResIntPair; // FIXME: should we bother splitting JFPU pipe + unit stages for fast instructions? // FIXME: Double precision latencies // FIXME: SS vs PS latencies -// FIXME: RSQRT latencies // FIXME: ymm latencies //////////////////////////////////////////////////////////////////////////////// -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; def : WriteRes { diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 90d85878812..f95d4fa0417 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -101,6 +101,7 @@ def : WriteRes { // Scalar and vector floating point. defm : SMWriteResPair; defm : SMWriteResPair; +defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair;