MI-Sched: Model "reserved" processor resources.

This allows a target to use MI-Sched as an in-order scheduler that
will model strict resource conflicts without defining a processor
itinerary. Instead, the target can now use the new per-operand machine
model and define in-order resources with BufferSize=0. For example,
this would allow restricting the type of operations that can be formed
into a dispatch group. (Normally NumMicroOps is sufficient to enforce
dispatch groups).

If the intent is to model latency in in-order pipeline, as opposed to
resource conflicts, then a resource with BufferSize=1 should be
defined instead.

This feature is only casually tested as there are no in-tree targets
using it yet. However, Hal will be experimenting with POWER7.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196517 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2013-12-05 17:56:02 +00:00
parent 573931394f
commit 6606ef0e98
3 changed files with 92 additions and 30 deletions

View File

@ -292,7 +292,8 @@ namespace llvm {
bool isScheduleHigh : 1; // True if preferable to schedule high. bool isScheduleHigh : 1; // True if preferable to schedule high.
bool isScheduleLow : 1; // True if preferable to schedule low. bool isScheduleLow : 1; // True if preferable to schedule low.
bool isCloned : 1; // True if this node has been cloned. bool isCloned : 1; // True if this node has been cloned.
bool isUnbuffered : 1; // Reads an unbuffered resource. bool isUnbuffered : 1; // Uses an unbuffered resource.
bool hasReservedResource : 1; // Uses a reserved resource.
Sched::Preference SchedulingPref; // Scheduling preference. Sched::Preference SchedulingPref; // Scheduling preference.
private: private:
@ -318,9 +319,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false), isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false), isScheduleLow(false), isCloned(false), isUnbuffered(false),
SchedulingPref(Sched::None), isDepthCurrent(false), hasReservedResource(false), SchedulingPref(Sched::None),
isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
/// a MachineInstr. /// a MachineInstr.
@ -333,9 +334,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false), isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false), isScheduleLow(false), isCloned(false), isUnbuffered(false),
SchedulingPref(Sched::None), isDepthCurrent(false), hasReservedResource(false), SchedulingPref(Sched::None),
isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct a placeholder SUnit. /// SUnit - Construct a placeholder SUnit.
SUnit() SUnit()
@ -347,9 +348,9 @@ namespace llvm {
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false), isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), isUnbuffered(false), isScheduleLow(false), isCloned(false), isUnbuffered(false),
SchedulingPref(Sched::None), isDepthCurrent(false), hasReservedResource(false), SchedulingPref(Sched::None),
isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// \brief Boundary nodes are placeholders for the boundary of the /// \brief Boundary nodes are placeholders for the boundary of the
/// scheduling region. /// scheduling region.

View File

@ -1322,6 +1322,8 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
// GenericScheduler - Implementation of the generic MachineSchedStrategy. // GenericScheduler - Implementation of the generic MachineSchedStrategy.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
static const unsigned InvalidCycle = ~0U;
namespace { namespace {
/// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule. /// the schedule.
@ -1491,6 +1493,10 @@ public:
// Is the scheduled region resource limited vs. latency limited. // Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited; bool IsResourceLimited;
// Record the highest cycle at which each resource has been reserved by a
// scheduled instruction.
SmallVector<unsigned, 16> ReservedCycles;
#ifndef NDEBUG #ifndef NDEBUG
// Remember the greatest operand latency as an upper bound on the number of // Remember the greatest operand latency as an upper bound on the number of
// times we should retry the pending queue because of a hazard. // times we should retry the pending queue because of a hazard.
@ -1518,6 +1524,7 @@ public:
MaxExecutedResCount = 0; MaxExecutedResCount = 0;
ZoneCritResIdx = 0; ZoneCritResIdx = 0;
IsResourceLimited = false; IsResourceLimited = false;
ReservedCycles.clear();
#ifndef NDEBUG #ifndef NDEBUG
MaxObservedLatency = 0; MaxObservedLatency = 0;
#endif #endif
@ -1587,6 +1594,8 @@ public:
/// cycle. /// cycle.
unsigned getLatencyStallCycles(SUnit *SU); unsigned getLatencyStallCycles(SUnit *SU);
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
bool checkHazard(SUnit *SU); bool checkHazard(SUnit *SU);
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
@ -1708,8 +1717,10 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
DAG = dag; DAG = dag;
SchedModel = smodel; SchedModel = smodel;
Rem = rem; Rem = rem;
if (SchedModel->hasInstrSchedModel()) if (SchedModel->hasInstrSchedModel()) {
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
}
} }
/// Initialize the per-region scheduling policy. /// Initialize the per-region scheduling policy.
@ -1890,6 +1901,20 @@ unsigned GenericScheduler::SchedBoundary::getLatencyStallCycles(SUnit *SU) {
return 0; return 0;
} }
/// Compute the next cycle at which the given processor resource can be
/// scheduled.
unsigned GenericScheduler::SchedBoundary::
getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
unsigned NextUnreserved = ReservedCycles[PIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
return 0;
// For bottom-up scheduling add the cycles needed for the current operation.
if (!isTop())
NextUnreserved += Cycles;
return NextUnreserved;
}
/// Does this SU have a hazard within the current instruction group. /// Does this SU have a hazard within the current instruction group.
/// ///
/// The scheduler supports two modes of hazard recognition. The first is the /// The scheduler supports two modes of hazard recognition. The first is the
@ -1913,6 +1938,15 @@ bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true; return true;
} }
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
return true;
}
}
return false; return false;
} }
@ -2097,7 +2131,7 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
/// \return the next cycle at which the instruction may execute without /// \return the next cycle at which the instruction may execute without
/// oversubscribing resources. /// oversubscribing resources.
unsigned GenericScheduler::SchedBoundary:: unsigned GenericScheduler::SchedBoundary::
countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles; unsigned Count = Factor * Cycles;
DEBUG(dbgs() << " " << getResourceName(PIdx) DEBUG(dbgs() << " " << getResourceName(PIdx)
@ -2116,8 +2150,14 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
<< getResourceName(PIdx) << ": " << getResourceName(PIdx) << ": "
<< getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
} }
// TODO: We don't yet model reserved resources. It's not hard though. // For reserved resources, record the highest cycle using the resource.
return CurrCycle; unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
if (NextAvailable > CurrCycle) {
DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getProcResource(PIdx)->Name << " reserved until @"
<< NextAvailable << "\n");
}
return NextAvailable;
} }
/// Move the boundary of scheduled code by one SUnit. /// Move the boundary of scheduled code by one SUnit.
@ -2131,25 +2171,17 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
} }
HazardRec->EmitInstruction(SU); HazardRec->EmitInstruction(SU);
} }
// checkHazard should prevent scheduling multiple instructions per cycle that
// exceed the issue width.
const MCSchedClassDesc *SC = DAG->getSchedClass(SU); const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
CurrMOps += IncMOps; assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() &&
// checkHazard prevents scheduling multiple instructions per cycle that exceed "Cannot scheduling this instructions MicroOps in the current cycle.");
// issue width. However, we commonly reach the maximum. In this case
// opportunistically bump the cycle to avoid uselessly checking everything in
// the readyQ. Furthermore, a single instruction may produce more than one
// cycle's worth of micro-ops.
//
// TODO: Also check if this SU must end a dispatch group.
unsigned NextCycle = CurrCycle;
if (CurrMOps >= SchedModel->getIssueWidth()) {
++NextCycle;
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');
}
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
unsigned NextCycle = CurrCycle;
switch (SchedModel->getMicroOpBufferSize()) { switch (SchedModel->getMicroOpBufferSize()) {
case 0: case 0:
assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
@ -2194,10 +2226,23 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
PI = SchedModel->getWriteProcResBegin(SC), PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned RCycle = unsigned RCycle =
countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
if (RCycle > NextCycle) if (RCycle > NextCycle)
NextCycle = RCycle; NextCycle = RCycle;
} }
if (SU->hasReservedResource) {
// For reserved resources, record the highest cycle using the resource.
// For top-down scheduling, this is the cycle in which we schedule this
// instruction plus the number of cycles the operations reserves the
// resource. For bottom-up is it simply the instruction's cycle.
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
}
}
} }
// Update ExpectedLatency and DependentLatency. // Update ExpectedLatency and DependentLatency.
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
@ -2224,6 +2269,16 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
(int)(getCriticalCount() - (getScheduledLatency() * LFactor)) (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
> (int)LFactor; > (int)LFactor;
} }
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
// one cycle. Since we commonly reach the max MOps here, opportunistically
// bump the cycle to avoid uselessly checking everything in the readyQ.
CurrMOps += IncMOps;
while (CurrMOps >= SchedModel->getIssueWidth()) {
bumpCycle(++NextCycle);
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');
}
DEBUG(dumpScheduledState()); DEBUG(dumpScheduledState());
} }

View File

@ -697,9 +697,15 @@ void ScheduleDAGInstrs::initSUnits() {
for (TargetSchedModel::ProcResIter for (TargetSchedModel::ProcResIter
PI = SchedModel.getWriteProcResBegin(SC), PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1) { switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
case 0:
SU->hasReservedResource = true;
break;
case 1:
SU->isUnbuffered = true; SU->isUnbuffered = true;
break; break;
default:
break;
} }
} }
} }