From 6606ef0e98855e9e46404241eedebacb3b424976 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 5 Dec 2013 17:56:02 +0000 Subject: [PATCH] MI-Sched: Model "reserved" processor resources. This allows a target to use MI-Sched as an in-order scheduler that will model strict resource conflicts without defining a processor itinerary. Instead, the target can now use the new per-operand machine model and define in-order resources with BufferSize=0. For example, this would allow restricting the type of operations that can be formed into a dispatch group. (Normally NumMicroOps is sufficient to enforce dispatch groups). If the intent is to model latency in in-order pipeline, as opposed to resource conflicts, then a resource with BufferSize=1 should be defined instead. This feature is only casually tested as there are no in-tree targets using it yet. However, Hal will be experimenting with POWER7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196517 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAG.h | 21 +++---- lib/CodeGen/MachineScheduler.cpp | 93 ++++++++++++++++++++++++------ lib/CodeGen/ScheduleDAGInstrs.cpp | 8 ++- 3 files changed, 92 insertions(+), 30 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 48ed232a15c..66bf8c5dd77 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -292,7 +292,8 @@ namespace llvm { bool isScheduleHigh : 1; // True if preferable to schedule high. bool isScheduleLow : 1; // True if preferable to schedule low. bool isCloned : 1; // True if this node has been cloned. - bool isUnbuffered : 1; // Reads an unbuffered resource. + bool isUnbuffered : 1; // Uses an unbuffered resource. + bool hasReservedResource : 1; // Uses a reserved resource. Sched::Preference SchedulingPref; // Scheduling preference. private: @@ -318,9 +319,9 @@ namespace llvm { hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isScheduleLow(false), isCloned(false), isUnbuffered(false), - SchedulingPref(Sched::None), isDepthCurrent(false), - isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), - BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} + hasReservedResource(false), SchedulingPref(Sched::None), + isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), + TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// SUnit - Construct an SUnit for post-regalloc scheduling to represent /// a MachineInstr. @@ -333,9 +334,9 @@ namespace llvm { hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isScheduleLow(false), isCloned(false), isUnbuffered(false), - SchedulingPref(Sched::None), isDepthCurrent(false), - isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), - BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} + hasReservedResource(false), SchedulingPref(Sched::None), + isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), + TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// SUnit - Construct a placeholder SUnit. SUnit() @@ -347,9 +348,9 @@ namespace llvm { hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isScheduleLow(false), isCloned(false), isUnbuffered(false), - SchedulingPref(Sched::None), isDepthCurrent(false), - isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), - BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} + hasReservedResource(false), SchedulingPref(Sched::None), + isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), + TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// \brief Boundary nodes are placeholders for the boundary of the /// scheduling region. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 3296149afa8..6cfedcbbc2e 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1322,6 +1322,8 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { // GenericScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// +static const unsigned InvalidCycle = ~0U; + namespace { /// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. @@ -1491,6 +1493,10 @@ public: // Is the scheduled region resource limited vs. latency limited. bool IsResourceLimited; + // Record the highest cycle at which each resource has been reserved by a + // scheduled instruction. + SmallVector ReservedCycles; + #ifndef NDEBUG // Remember the greatest operand latency as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -1518,6 +1524,7 @@ public: MaxExecutedResCount = 0; ZoneCritResIdx = 0; IsResourceLimited = false; + ReservedCycles.clear(); #ifndef NDEBUG MaxObservedLatency = 0; #endif @@ -1587,6 +1594,8 @@ public: /// cycle. unsigned getLatencyStallCycles(SUnit *SU); + unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); + bool checkHazard(SUnit *SU); unsigned findMaxLatency(ArrayRef ReadySUs); @@ -1708,8 +1717,10 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { DAG = dag; SchedModel = smodel; Rem = rem; - if (SchedModel->hasInstrSchedModel()) + if (SchedModel->hasInstrSchedModel()) { ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); + ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + } } /// Initialize the per-region scheduling policy. @@ -1890,6 +1901,20 @@ unsigned GenericScheduler::SchedBoundary::getLatencyStallCycles(SUnit *SU) { return 0; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. +unsigned GenericScheduler::SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[PIdx]; + // If this resource has never been used, always return cycle zero. + if (NextUnreserved == InvalidCycle) + return 0; + // For bottom-up scheduling add the cycles needed for the current operation. + if (!isTop()) + NextUnreserved += Cycles; + return NextUnreserved; +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1913,6 +1938,15 @@ bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + return true; + } + } return false; } @@ -2097,7 +2131,7 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. unsigned GenericScheduler::SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { +countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; DEBUG(dbgs() << " " << getResourceName(PIdx) @@ -2116,8 +2150,14 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { << getResourceName(PIdx) << ": " << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } - // TODO: We don't yet model reserved resources. It's not hard though. - return CurrCycle; + // For reserved resources, record the highest cycle using the resource. + unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + if (NextAvailable > CurrCycle) { + DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name << " reserved until @" + << NextAvailable << "\n"); + } + return NextAvailable; } /// Move the boundary of scheduled code by one SUnit. @@ -2131,25 +2171,17 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + // checkHazard should prevent scheduling multiple instructions per cycle that + // exceed the issue width. const MCSchedClassDesc *SC = DAG->getSchedClass(SU); unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); - CurrMOps += IncMOps; - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - // - // TODO: Also check if this SU must end a dispatch group. - unsigned NextCycle = CurrCycle; - if (CurrMOps >= SchedModel->getIssueWidth()) { - ++NextCycle; - DEBUG(dbgs() << " *** Max MOps " << CurrMOps - << " at cycle " << CurrCycle << '\n'); - } + assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() && + "Cannot scheduling this instructions MicroOps in the current cycle."); + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); @@ -2194,10 +2226,23 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } + if (SU->hasReservedResource) { + // For reserved resources, record the highest cycle using the resource. + // For top-down scheduling, this is the cycle in which we schedule this + // instruction plus the number of cycles the operations reserves the + // resource. For bottom-up is it simply the instruction's cycle. + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) + ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; + } + } } // Update ExpectedLatency and DependentLatency. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; @@ -2224,6 +2269,16 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) > (int)LFactor; } + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle + // resets CurrMOps. Loop to handle instructions with more MOps than issue in + // one cycle. Since we commonly reach the max MOps here, opportunistically + // bump the cycle to avoid uselessly checking everything in the readyQ. + CurrMOps += IncMOps; + while (CurrMOps >= SchedModel->getIssueWidth()) { + bumpCycle(++NextCycle); + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + } DEBUG(dumpScheduledState()); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index eeae6ec03d8..977b8f0b41d 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -697,9 +697,15 @@ void ScheduleDAGInstrs::initSUnits() { for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: SU->isUnbuffered = true; break; + default: + break; } } }