diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index fa78ffcbde0..cff97cdb3be 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -83,34 +83,38 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>; -def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"tonga", SI_Itin, [FeatureVolcanicIslands]>; +//===----------------------------------------------------------------------===// +// Volcanic Islands +//===----------------------------------------------------------------------===// -def : Proc<"iceland", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>; -def : Proc<"carrizo", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; + +def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index ff8db67a619..99a1df36c1f 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -68,6 +68,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : // Most instructions require adjustments after selection to satisfy // operand requirements. let hasPostISelHook = 1; + let SchedRW = [Write32Bit]; } class Enc32 { @@ -214,9 +215,9 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 { let Inst{31-27} = 0x18; //encoding } +let SchedRW = [WriteSALU] in { class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> : InstSI<outs, ins, asm, pattern> { - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -274,6 +275,8 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> : let UseNamedOperandTable = 1; } +} // let SchedRW = [WriteSALU] + class SMRD <dag outs, dag ins, string asm, list<dag> pattern> : InstSI<outs, ins, asm, pattern> { @@ -283,6 +286,7 @@ class SMRD <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 1; let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteSMEM]; } //===----------------------------------------------------------------------===// @@ -588,6 +592,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let DS = 1; let UseNamedOperandTable = 1; let DisableEncoding = "$m0"; + let SchedRW = [WriteLDS]; } class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : @@ -602,6 +607,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> : @@ -613,6 +619,7 @@ class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : @@ -641,5 +648,4 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : } - } // End Uses = [EXEC] diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 980dba79bb8..dd0ee407d3e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1191,6 +1191,8 @@ defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>; let Uses = [EXEC] in { +// FIXME: Specify SchedRW for READFIRSTLANE_B32 + def V_READFIRSTLANE_B32 : VOP1 < 0x00000002, (outs SReg_32:$vdst), @@ -1201,6 +1203,8 @@ def V_READFIRSTLANE_B32 : VOP1 < } +let SchedRW = [WriteQuarterRate32] in { + defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64", VOP_I32_F64, fp_to_sint >; @@ -1253,6 +1257,9 @@ defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64", defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32", VOP_F64_I32, uint_to_fp >; + +} // let SchedRW = [WriteQuarterRate32] + defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32", VOP_F32_F32, AMDGPUfract >; @@ -1271,6 +1278,9 @@ defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32", defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32", VOP_F32_F32, fexp2 >; + +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32", VOP_F32_F32, flog2 >; @@ -1283,18 +1293,32 @@ defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32", defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32", VOP_F32_F32, AMDGPUrsq >; + +} //let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64", VOP_F64_F64, AMDGPUrcp >; defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64", VOP_F64_F64, AMDGPUrsq >; + +} // let SchedRW = [WriteDouble]; + defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32", VOP_F32_F32, fsqrt >; + +let SchedRW = [WriteDouble] in { + defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64", VOP_F64_F64, fsqrt >; + +} // let SchedRW = [WriteDouble] + defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32", VOP_F32_F32, AMDGPUsin >; @@ -1323,6 +1347,8 @@ defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32> // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>; defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>; @@ -1332,17 +1358,25 @@ defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32", defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy >; + +} // End let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>; defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamped >; +} // End SchedRW = [WriteDouble] + } // End SubtargetPredicate = isSICI //===----------------------------------------------------------------------===// // VINTRP Instructions //===----------------------------------------------------------------------===// +// FIXME: Specify SchedRW for VINTRP insturctions. defm V_INTERP_P1_F32 : VINTRP_m < 0x00000000, "v_interp_p1_f32", (outs VGPR_32:$dst), @@ -1656,11 +1690,15 @@ defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32", defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; + +let SchedRW = [WriteDouble] in { + defm V_DIV_FIXUP_F64 : VOP3Inst < vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup >; -// Only on SI +} // let SchedRW = [WriteDouble] + defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32, shl >; @@ -1675,6 +1713,7 @@ defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32, sra >; +let SchedRW = [WriteDouble] in { let isCommutable = 1 in { defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64", @@ -1697,7 +1736,9 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -let isCommutable = 1 in { +} // let SchedRW = [WriteDouble] + +let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32", VOP_I32_I32_I32 @@ -1713,30 +1754,37 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32", VOP_I32_I32_I32 >; -} // isCommutable = 1 +} // isCommutable = 1, SchedRW = [WriteQuarterRate32] defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>; +let SchedRW = [WriteDouble] in { // Double precision division pre-scale. defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>; +} // let SchedRW = [WriteDouble] let isCommutable = 1 in { defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; +let SchedRW = [WriteDouble] in { defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fmas >; +} // End SchedRW = [WriteDouble] } // End isCommutable = 1 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; +let SchedRW = [WriteDouble] in { defm V_TRIG_PREOP_F64 : VOP3Inst < vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop >; +} // let SchedRW = [WriteDouble] + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td index 28b65b82585..9b1f676020b 100644 --- a/lib/Target/R600/SISchedule.td +++ b/lib/Target/R600/SISchedule.td @@ -7,9 +7,85 @@ // //===----------------------------------------------------------------------===// // -// TODO: This is just a place holder for now. +// MachineModel definitions for Southern Islands (SI) // //===----------------------------------------------------------------------===// +def WriteBranch : SchedWrite; +def WriteExport : SchedWrite; +def WriteLDS : SchedWrite; +def WriteSALU : SchedWrite; +def WriteSMEM : SchedWrite; +def WriteVMEM : SchedWrite; -def SI_Itin : ProcessorItineraries <[], [], []>; +// Vector ALU instructions +def Write32Bit : SchedWrite; +def WriteQuarterRate32 : SchedWrite; + +def WriteFloatFMA : SchedWrite; + +def WriteDouble : SchedWrite; +def WriteDoubleAdd : SchedWrite; + +def SIFullSpeedModel : SchedMachineModel; +def SIQuarterSpeedModel : SchedMachineModel; + +// BufferSize = 0 means the processors are in-order. +let BufferSize = 0 in { + +// XXX: Are the resource counts correct? +def HWBranch : ProcResource<1>; +def HWExport : ProcResource<7>; // Taken from S_WAITCNT +def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT +def HWSALU : ProcResource<1>; +def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT +def HWVALU : ProcResource<1>; + +} + +class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, + int latency> : WriteRes<write, resources> { + let Latency = latency; +} + +class HWVALUWriteRes<SchedWrite write, int latency> : + HWWriteRes<write, [HWVALU], latency>; + + +// The latency numbers are taken from AMD Accelerated Parallel Processing +// guide. They may not be acurate. + +// The latency values are 1 / (operations / cycle) / 4. +multiclass SICommonWriteRes { + + def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ??? + def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ??? + def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64 + def : HWWriteRes<WriteSALU, [HWSALU], 1>; + def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ??? + def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600 + + def : HWVALUWriteRes<Write32Bit, 1>; + def : HWVALUWriteRes<WriteQuarterRate32, 4>; +} + + +let SchedModel = SIFullSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes<WriteFloatFMA, 1>; +def : HWVALUWriteRes<WriteDouble, 4>; +def : HWVALUWriteRes<WriteDoubleAdd, 2>; + +} // End SchedModel = SIFullSpeedModel + +let SchedModel = SIQuarterSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes<WriteFloatFMA, 16>; +def : HWVALUWriteRes<WriteDouble, 16>; +def : HWVALUWriteRes<WriteDoubleAdd, 8>; + +} // End SchedModel = SIQuarterSpeedModel