misched: Infrastructure for weak DAG edges.

This adds support for weak DAG edges to the general scheduling
infrastructure in preparation for MachineScheduler support for
heuristics based on weak edges.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167738 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2012-11-12 19:28:57 +00:00
parent 95d8afc5f2
commit ae692f2bae
8 changed files with 142 additions and 66 deletions

View File

@ -57,7 +57,8 @@ namespace llvm {
Barrier, ///< An unknown scheduling barrier.
MayAliasMem, ///< Nonvolatile load/Store instructions that may alias.
MustAliasMem, ///< Nonvolatile load/Store instructions that must alias.
Artificial ///< Arbitrary weak DAG edge (no actual dependence).
Artificial, ///< Arbitrary weak DAG edge (no actual dependence).
Cluster ///< Weak DAG edge linking a chain of clustered instrs.
};
private:
@ -200,12 +201,27 @@ namespace llvm {
return getKind() == Order && Contents.OrdKind == MustAliasMem;
}
/// isWeak - Test if this a weak dependence. Weak dependencies are
/// considered DAG edges for height computation and other heuristics, but do
/// not force ordering. Breaking a weak edge may require the scheduler to
/// compensate, for example by inserting a copy.
bool isWeak() const {
return getKind() == Order
&& (Contents.OrdKind == Artificial || Contents.OrdKind == Cluster);
}
/// isArtificial - Test if this is an Order dependence that is marked
/// as "artificial", meaning it isn't necessary for correctness.
bool isArtificial() const {
return getKind() == Order && Contents.OrdKind == Artificial;
}
/// isCluster - Test if this is an Order dependence that is marked
/// as "cluster", meaning it is artificial and wants to be adjacent.
bool isCluster() const {
return getKind() == Order && Contents.OrdKind == Cluster;
}
/// isAssignedRegDep - Test if this is a Data dependence that is
/// associated with a register.
bool isAssignedRegDep() const {
@ -267,6 +283,8 @@ namespace llvm {
unsigned NumSuccs; // # of SDep::Data sucss.
unsigned NumPredsLeft; // # of preds not scheduled.
unsigned NumSuccsLeft; // # of succs not scheduled.
unsigned WeakPredsLeft; // # of weak preds not scheduled.
unsigned WeakSuccsLeft; // # of weak succs not scheduled.
unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use.
unsigned short Latency; // Node latency.
bool isVRegCycle : 1; // May use and def the same vreg.
@ -301,12 +319,12 @@ namespace llvm {
SUnit(SDNode *node, unsigned nodenum)
: Node(node), Instr(0), OrigNode(0), SchedClass(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
hasPhysRegClobbers(false), isPending(false), isAvailable(false),
isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@ -315,12 +333,12 @@ namespace llvm {
SUnit(MachineInstr *instr, unsigned nodenum)
: Node(0), Instr(instr), OrigNode(0), SchedClass(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
hasPhysRegClobbers(false), isPending(false), isAvailable(false),
isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@ -328,12 +346,12 @@ namespace llvm {
SUnit()
: Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
hasPhysRegClobbers(false), isPending(false), isAvailable(false),
isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@ -372,7 +390,7 @@ namespace llvm {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
bool addPred(const SDep &D);
bool addPred(const SDep &D, bool Required = true);
/// removePred - This removes the specified edge as a pred of the current
/// node if it exists. It also removes the current node as a successor of
@ -654,6 +672,7 @@ namespace llvm {
class ScheduleDAGTopologicalSort {
/// SUnits - A reference to the ScheduleDAG's SUnits.
std::vector<SUnit> &SUnits;
SUnit *ExitSU;
/// Index2Node - Maps topological index to the node number.
std::vector<int> Index2Node;
@ -675,7 +694,7 @@ namespace llvm {
void Allocate(int n, int index);
public:
explicit ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits);
ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.

View File

@ -310,6 +310,10 @@ void ReadyQueue::dump() {
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
if (SuccEdge->isWeak()) {
--SuccSU->WeakPredsLeft;
return;
}
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@ -338,6 +342,10 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredEdge->isWeak()) {
--PredSU->WeakSuccsLeft;
return;
}
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@ -530,17 +538,20 @@ void ScheduleDAGMI::postprocessDAG() {
}
// Release all DAG roots for scheduling.
//
// Nodes with unreleased weak edges can still be roots.
void ScheduleDAGMI::releaseRoots() {
SmallVector<SUnit*, 16> BotRoots;
for (std::vector<SUnit>::iterator
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
SUnit *SU = &(*I);
// A SUnit is ready to top schedule if it has no predecessors.
if (I->Preds.empty())
SchedImpl->releaseTopNode(&(*I));
if (!I->NumPredsLeft && SU != &EntrySU)
SchedImpl->releaseTopNode(SU);
// A SUnit is ready to bottom schedule if it has no successors.
if (I->Succs.empty())
BotRoots.push_back(&(*I));
if (!I->NumSuccsLeft && SU != &ExitSU)
BotRoots.push_back(SU);
}
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
@ -555,13 +566,12 @@ void ScheduleDAGMI::initQueues() {
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
// Release edges from the special Entry node or to the special Exit node.
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
releaseRoots();
releaseSuccessors(&EntrySU);
releasePredecessors(&ExitSU);
// Release all DAG roots for scheduling.
releaseRoots();
SchedImpl->registerRoots();
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);

View File

@ -111,9 +111,6 @@ namespace {
/// added to the AvailableQueue.
std::vector<SUnit*> PendingQueue;
/// Topo - A topological ordering for SUnits.
ScheduleDAGTopologicalSort Topo;
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
@ -198,7 +195,7 @@ SchedulePostRATDList::SchedulePostRATDList(
AliasAnalysis *AA, const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
: ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
: ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
LiveRegs(TRI->getNumRegs())
{
const TargetMachine &TM = MF.getTarget();
@ -580,10 +577,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
//===----------------------------------------------------------------------===//
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
/// the PendingQueue if the count reaches zero. Also update its cycle bound.
/// the PendingQueue if the count reaches zero.
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
if (SuccEdge->isArtificial()) {
--SuccSU->WeakPredsLeft;
return;
}
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@ -653,8 +654,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
// Add all leaves to Available queue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
bool available = SUnits[i].Preds.empty();
if (available) {
if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
AvailableQueue.push(&SUnits[i]);
SUnits[i].isAvailable = true;
}

View File

@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
bool SUnit::addPred(const SDep &D) {
bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
I != E; ++I) {
// Zero-latency weak edges may be added purely for heuristic ordering. Don't
// add them if another kind of edge already exists.
if (!Required && I->getSUnit() == D.getSUnit())
return false;
if (I->overlaps(D)) {
// Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
if (I->getLatency() < D.getLatency()) {
@ -96,13 +100,26 @@ bool SUnit::addPred(const SDep &D) {
++NumPreds;
++N->NumSuccs;
}
// SD scheduler relies on artificial edges to enforce physreg
// antidependence, so it doesn't treat them as weak edges.
bool isWeak = D.isWeak() && N->isInstr();
if (!N->isScheduled) {
assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
++NumPredsLeft;
if (isWeak) {
++WeakPredsLeft;
}
else {
assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
++NumPredsLeft;
}
}
if (!isScheduled) {
assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
++N->NumSuccsLeft;
if (isWeak) {
++N->WeakSuccsLeft;
}
else {
assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
++N->NumSuccsLeft;
}
}
Preds.push_back(D);
N->Succs.push_back(P);
@ -143,13 +160,22 @@ void SUnit::removePred(const SDep &D) {
--NumPreds;
--N->NumSuccs;
}
bool isWeak = D.isWeak() && N->isInstr();
if (!N->isScheduled) {
assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
--NumPredsLeft;
if (isWeak)
--WeakPredsLeft;
else {
assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
--NumPredsLeft;
}
}
if (!isScheduled) {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
--N->NumSuccsLeft;
if (isWeak)
--N->WeakSuccsLeft;
else {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
--N->NumSuccsLeft;
}
}
if (P.getLatency() != 0) {
this->setDepthDirty();
@ -292,6 +318,10 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
if (WeakPredsLeft)
dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
if (WeakSuccsLeft)
dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
@ -429,6 +459,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
Node2Index.resize(DAGSize);
// Initialize the data structures.
if (ExitSU)
WorkList.push_back(ExitSU);
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &SUnits[i];
int NodeNum = SU->NodeNum;
@ -448,11 +480,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
Allocate(SU->NodeNum, --Id);
if (SU->NodeNum < DAGSize)
Allocate(SU->NodeNum, --Id);
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit *SU = I->getSUnit();
if (!--Node2Index[SU->NodeNum])
if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
// If all dependencies of the node are processed already,
// then the node can be computed now.
WorkList.push_back(SU);
@ -513,7 +546,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
WorkList.pop_back();
Visited.set(SU->NodeNum);
for (int I = SU->Succs.size()-1; I >= 0; --I) {
int s = SU->Succs[I].getSUnit()->NodeNum;
unsigned s = SU->Succs[I].getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
continue;
if (Node2Index[s] == UpperBound) {
HasLoop = true;
return;
@ -554,15 +590,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
/// create a cycle.
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
if (IsReachable(TargetSU, SU))
/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I)
for (SUnit::pred_iterator
I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
if (I->isAssignedRegDep() &&
IsReachable(TargetSU, I->getSUnit()))
IsReachable(SU, I->getSUnit()))
return true;
return false;
}
@ -592,6 +629,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
}
ScheduleDAGTopologicalSort::
ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
: SUnits(sunits), ExitSU(exitsu) {}
ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}

View File

@ -245,21 +245,26 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (UseSU == SU)
continue;
SDep dep(SU, SDep::Data, *Alias);
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
int UseOp = UseList[i].OpIdx;
MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
dep.setLatency(
MachineInstr *RegUse = 0;
SDep Dep;
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
else {
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
Dep.setMinLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp, /*FindMin=*/true));
}
Dep.setLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp, /*FindMin=*/false));
dep.setMinLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp, /*FindMin=*/true));
ST.adjustSchedDependency(SU, UseSU, dep);
UseSU->addPred(dep);
ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}
}

View File

@ -156,7 +156,7 @@ public:
CodeGenOpt::Level OptLevel)
: ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits) {
Topo(SUnits, NULL) {
const TargetMachine &tm = mf.getTarget();
if (DisableSchedCycles || !NeedLatency)

View File

@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
llvm_unreachable(0);
}
#endif
assert(!D.isWeak() && "unexpected artificial DAG edge");
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());

View File

@ -13,6 +13,7 @@
; CHECK-NOT: ch SU
; CHECK: ch SU(2): Latency=1
; CHECK-NOT: ch SU
; CHECK: Successors:
; CHECK: ** List Scheduling
; CHECK: SU(2){{.*}}STR{{.*}}
; CHECK-NOT: ch SU
@ -22,6 +23,7 @@
; CHECK-NOT: ch SU
; CHECK: ch SU(2): Latency=1
; CHECK-NOT: ch SU
; CHECK: Successors:
define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
entry:
store volatile i32 65540, i32* %p1, align 4, !tbaa !0