Fix some register-alias-related bugs in the post-RA scheduler liveness

computation code. Also, avoid adding output-depenency edges when both
defs are dead, which frequently happens with EFLAGS defs.

Compute Depth and Height lazily, and always in terms of edge latency
values. For the schedulers that don't care about latency, edge latencies
are set to 1.

Eliminate Cycle and CycleBound, and LatencyPriorityQueue's Latencies array.
These are all subsumed by the Depth and Height fields.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61073 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman
2008-12-16 03:25:46 +00:00
parent 64722e5163
commit 3f23744df4
12 changed files with 357 additions and 299 deletions

View File

@@ -34,10 +34,6 @@ namespace llvm {
// SUnits - The SUnits for the current graph. // SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits; std::vector<SUnit> *SUnits;
// Latencies - The latency (max of latency from this node to the bb exit)
// for each node.
std::vector<int> Latencies;
/// NumNodesSolelyBlocking - This vector contains, for every node in the /// NumNodesSolelyBlocking - This vector contains, for every node in the
/// Queue, the number of nodes that the node is the sole unscheduled /// Queue, the number of nodes that the node is the sole unscheduled
/// predecessor for. This is used as a tie-breaker heuristic for better /// predecessor for. This is used as a tie-breaker heuristic for better
@@ -51,29 +47,23 @@ public:
void initNodes(std::vector<SUnit> &sunits) { void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits; SUnits = &sunits;
// Calculate node priorities. NumNodesSolelyBlocking.resize(SUnits->size(), 0);
CalculatePriorities();
} }
void addNode(const SUnit *SU) { void addNode(const SUnit *SU) {
Latencies.resize(SUnits->size(), -1);
NumNodesSolelyBlocking.resize(SUnits->size(), 0); NumNodesSolelyBlocking.resize(SUnits->size(), 0);
CalcLatency(*SU);
} }
void updateNode(const SUnit *SU) { void updateNode(const SUnit *SU) {
Latencies[SU->NodeNum] = -1;
CalcLatency(*SU);
} }
void releaseState() { void releaseState() {
SUnits = 0; SUnits = 0;
Latencies.clear();
} }
unsigned getLatency(unsigned NodeNum) const { unsigned getLatency(unsigned NodeNum) const {
assert(NodeNum < Latencies.size()); assert(NodeNum < (*SUnits).size());
return Latencies[NodeNum]; return (*SUnits)[NodeNum].getHeight();
} }
unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
@@ -114,8 +104,6 @@ public:
void ScheduledNode(SUnit *Node); void ScheduledNode(SUnit *Node);
private: private:
void CalculatePriorities();
void CalcLatency(const SUnit &SU);
void AdjustPriorityOfUnscheduledPreds(SUnit *SU); void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
SUnit *getSingleUnscheduledPred(SUnit *SU); SUnit *getSingleUnscheduledPred(SUnit *SU);
}; };

View File

@@ -242,10 +242,12 @@ namespace llvm {
bool isPending : 1; // True once pending. bool isPending : 1; // True once pending.
bool isAvailable : 1; // True once available. bool isAvailable : 1; // True once available.
bool isScheduled : 1; // True once scheduled. bool isScheduled : 1; // True once scheduled.
unsigned CycleBound; // Upper/lower cycle to be scheduled at. private:
unsigned Cycle; // Once scheduled, the cycle of the op. bool isDepthCurrent : 1; // True if Depth is current.
unsigned Depth; // Node depth; bool isHeightCurrent : 1; // True if Height is current.
unsigned Height; // Node height; unsigned Depth; // Node depth.
unsigned Height; // Node height.
public:
const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null. const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
const TargetRegisterClass *CopySrcRC; const TargetRegisterClass *CopySrcRC;
@@ -256,7 +258,8 @@ namespace llvm {
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
isPending(false), isAvailable(false), isScheduled(false), isPending(false), isAvailable(false), isScheduled(false),
CycleBound(0), Cycle(~0u), Depth(0), Height(0), isDepthCurrent(false), isHeightCurrent(false),
Depth(0), Height(0),
CopyDstRC(NULL), CopySrcRC(NULL) {} CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
@@ -266,7 +269,8 @@ namespace llvm {
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
isPending(false), isAvailable(false), isScheduled(false), isPending(false), isAvailable(false), isScheduled(false),
CycleBound(0), Cycle(~0u), Depth(0), Height(0), isDepthCurrent(false), isHeightCurrent(false),
Depth(0), Height(0),
CopyDstRC(NULL), CopySrcRC(NULL) {} CopyDstRC(NULL), CopySrcRC(NULL) {}
/// setNode - Assign the representative SDNode for this SUnit. /// setNode - Assign the representative SDNode for this SUnit.
@@ -307,6 +311,41 @@ namespace llvm {
/// the specified node. /// the specified node.
void removePred(const SDep &D); void removePred(const SDep &D);
/// getHeight - Return the height of this node, which is the length of the
/// maximum path down to any node with has no successors.
unsigned getDepth() const {
if (!isDepthCurrent) const_cast<SUnit *>(this)->ComputeDepth();
return Depth;
}
/// getHeight - Return the height of this node, which is the length of the
/// maximum path up to any node with has no predecessors.
unsigned getHeight() const {
if (!isHeightCurrent) const_cast<SUnit *>(this)->ComputeHeight();
return Height;
}
/// setDepthToAtLeast - If NewDepth is greater than this node's depth
/// value, set it to be the new depth value. This also recursively
/// marks successor nodes dirty.
void setDepthToAtLeast(unsigned NewDepth);
/// setDepthToAtLeast - If NewDepth is greater than this node's depth
/// value, set it to be the new height value. This also recursively
/// marks predecessor nodes dirty.
void setHeightToAtLeast(unsigned NewHeight);
/// setDepthDirty - Set a flag in this node to indicate that its
/// stored Depth value will require recomputation the next time
/// getDepth() is called.
void setDepthDirty();
/// setHeightDirty - Set a flag in this node to indicate that its
/// stored Height value will require recomputation the next time
/// getHeight() is called.
void setHeightDirty();
/// isPred - Test if node N is a predecessor of this node.
bool isPred(SUnit *N) { bool isPred(SUnit *N) {
for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
if (Preds[i].getSUnit() == N) if (Preds[i].getSUnit() == N)
@@ -314,6 +353,7 @@ namespace llvm {
return false; return false;
} }
/// isSucc - Test if node N is a successor of this node.
bool isSucc(SUnit *N) { bool isSucc(SUnit *N) {
for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i) for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i)
if (Succs[i].getSUnit() == N) if (Succs[i].getSUnit() == N)
@@ -324,6 +364,10 @@ namespace llvm {
void dump(const ScheduleDAG *G) const; void dump(const ScheduleDAG *G) const;
void dumpAll(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const;
void print(raw_ostream &O, const ScheduleDAG *G) const; void print(raw_ostream &O, const ScheduleDAG *G) const;
private:
void ComputeDepth();
void ComputeHeight();
}; };
//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//
@@ -397,12 +441,7 @@ namespace llvm {
/// ComputeLatency - Compute node latency. /// ComputeLatency - Compute node latency.
/// ///
virtual void ComputeLatency(SUnit *SU) { SU->Latency = 1; } virtual void ComputeLatency(SUnit *SU) = 0;
/// CalculateDepths, CalculateHeights - Calculate node depth / height.
///
void CalculateDepths();
void CalculateHeights();
protected: protected:
/// EmitNoop - Emit a noop instruction. /// EmitNoop - Emit a noop instruction.
@@ -440,6 +479,11 @@ namespace llvm {
void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap); void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
/// ForceUnitLatencies - Return true if all scheduling edges should be given a
/// latency value of one. The default is to return false; schedulers may
/// override this as needed.
virtual bool ForceUnitLatencies() const { return false; }
private: private:
/// EmitLiveInCopy - Emit a copy for a live in physical register. If the /// EmitLiveInCopy - Emit a copy for a live in physical register. If the
/// physical register has only a single copy use, then coalesced the copy /// physical register has only a single copy use, then coalesced the copy

View File

@@ -18,10 +18,18 @@
#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAG.h"
namespace llvm { namespace llvm {
class MachineLoopInfo;
class MachineDominatorTree;
class ScheduleDAGInstrs : public ScheduleDAG { class ScheduleDAGInstrs : public ScheduleDAG {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
public: public:
ScheduleDAGInstrs(MachineBasicBlock *bb, ScheduleDAGInstrs(MachineBasicBlock *bb,
const TargetMachine &tm); const TargetMachine &tm,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt);
virtual ~ScheduleDAGInstrs() {} virtual ~ScheduleDAGInstrs() {}

View File

@@ -41,47 +41,6 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
} }
/// CalcNodePriority - Calculate the maximal path from the node to the exit.
///
void LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
int &Latency = Latencies[SU.NodeNum];
if (Latency != -1)
return;
std::vector<const SUnit*> WorkList;
WorkList.push_back(&SU);
while (!WorkList.empty()) {
const SUnit *Cur = WorkList.back();
bool AllDone = true;
unsigned MaxSuccLatency = 0;
for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end();
I != E; ++I) {
int SuccLatency = Latencies[I->getSUnit()->NodeNum];
if (SuccLatency == -1) {
AllDone = false;
WorkList.push_back(I->getSUnit());
} else {
unsigned NewLatency = SuccLatency + I->getLatency();
MaxSuccLatency = std::max(MaxSuccLatency, NewLatency);
}
}
if (AllDone) {
Latencies[Cur->NodeNum] = MaxSuccLatency;
WorkList.pop_back();
}
}
}
/// CalculatePriorities - Calculate priorities of all scheduling units.
void LatencyPriorityQueue::CalculatePriorities() {
Latencies.assign(SUnits->size(), -1);
NumNodesSolelyBlocking.assign(SUnits->size(), 0);
// For each node, calculate the maximal path from the node to the exit.
for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
CalcLatency((*SUnits)[i]);
}
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null. /// of SU, return it, otherwise return null.
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {

View File

@@ -23,7 +23,9 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h"
@@ -49,6 +51,14 @@ namespace {
static char ID; static char ID;
PostRAScheduler() : MachineFunctionPass(&ID) {} PostRAScheduler() : MachineFunctionPass(&ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
const char *getPassName() const { const char *getPassName() const {
return "Post RA top-down list latency scheduler"; return "Post RA top-down list latency scheduler";
} }
@@ -72,8 +82,10 @@ namespace {
ScheduleDAGTopologicalSort Topo; ScheduleDAGTopologicalSort Topo;
public: public:
SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm) SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm,
: ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {} const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT)
: ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {}
void Schedule(); void Schedule();
@@ -88,11 +100,14 @@ namespace {
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DOUT << "PostRAScheduler\n"; DOUT << "PostRAScheduler\n";
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
// Loop over all of the basic blocks // Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) { MBB != MBBe; ++MBB) {
SchedulePostRATDList Scheduler(MBB, Fn.getTarget()); SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT);
Scheduler.Run(); Scheduler.Run();
@@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI,
return TRI->getRegClass(II.OpInfo[Op].RegClass); return TRI->getRegClass(II.OpInfo[Op].RegClass);
} }
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static SDep *CriticalPathStep(SUnit *SU) {
SDep *Next = 0;
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
SUnit *PredSU = P->getSUnit();
unsigned PredLatency = P->getLatency();
unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
// In the case of a latency tie, prefer an anti-dependency edge over
// other types of edges.
if (NextDepth < PredTotalLatency ||
(NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
NextDepth = PredTotalLatency;
Next = &*P;
}
}
return Next;
}
/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers. /// of the ScheduleDAG and break them by renaming registers.
/// ///
@@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
// so just duck out immediately if the block is empty. // so just duck out immediately if the block is empty.
if (BB->empty()) return false; if (BB->empty()) return false;
Topo.InitDAGTopologicalSorting(); // Find the node at the bottom of the critical path.
// Compute a critical path for the DAG.
SUnit *Max = 0; SUnit *Max = 0;
std::vector<SDep *> CriticalPath(SUnits.size()); for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), SUnit *SU = &SUnits[i];
E = Topo.end(); I != E; ++I) { if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
SUnit *SU = &SUnits[*I];
for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
SUnit *PredSU = P->getSUnit();
// This assumes that there's no delay for reusing registers.
unsigned PredLatency = P->getLatency();
unsigned PredTotalLatency = PredSU->CycleBound + PredLatency;
if (SU->CycleBound < PredTotalLatency ||
(SU->CycleBound == PredTotalLatency &&
P->getKind() == SDep::Anti)) {
SU->CycleBound = PredTotalLatency;
CriticalPath[*I] = &*P;
}
}
// Keep track of the node at the end of the critical path.
if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency)
Max = SU; Max = SU;
} }
DOUT << "Critical path has total latency " DOUT << "Critical path has total latency "
<< (Max ? Max->CycleBound + Max->Latency : 0) << "\n"; << (Max ? Max->getDepth() + Max->Latency : 0) << "\n";
// Walk the critical path from the bottom up. Collect all anti-dependence // Walk the critical path from the bottom up. Collect all anti-dependence
// edges on the critical path. Skip anti-dependencies between SUnits that // edges on the critical path. Skip anti-dependencies between SUnits that
@@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
// the anti-dependencies in an instruction in order to be effective. // the anti-dependencies in an instruction in order to be effective.
BitVector AllocatableSet = TRI->getAllocatableSet(*MF); BitVector AllocatableSet = TRI->getAllocatableSet(*MF);
DenseMap<MachineInstr *, unsigned> CriticalAntiDeps; DenseMap<MachineInstr *, unsigned> CriticalAntiDeps;
for (SUnit *SU = Max; CriticalPath[SU->NodeNum]; SUnit *SU = Max;
SU = CriticalPath[SU->NodeNum]->getSUnit()) { for (SDep *Edge = CriticalPathStep(SU); Edge;
SDep *Edge = CriticalPath[SU->NodeNum]; Edge = CriticalPathStep(SU = Edge->getSUnit())) {
SUnit *NextSU = Edge->getSUnit(); SUnit *NextSU = Edge->getSUnit();
// Only consider anti-dependence edges. // Only consider anti-dependence edges.
if (Edge->getKind() != SDep::Anti) if (Edge->getKind() != SDep::Anti)
@@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
Classes[SubregReg] = 0; Classes[SubregReg] = 0;
RegRefs.erase(SubregReg); RegRefs.erase(SubregReg);
} }
for (const unsigned *Super = TRI->getSuperRegisters(Reg);
*Super; ++Super) {
unsigned SuperReg = *Super;
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
}
} }
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i); MachineOperand &MO = MI->getOperand(i);
@@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
// Compute how many cycles it will be before this actually becomes // Compute how many cycles it will be before this actually becomes
// available. This is the max of the start time of all predecessors plus // available. This is the max of the start time of all predecessors plus
// their latencies. // their latencies.
unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency(); SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
if (SuccSU->NumPredsLeft == 0) { if (SuccSU->NumPredsLeft == 0) {
PendingQueue.push_back(SuccSU); PendingQueue.push_back(SuccSU);
@@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
Sequence.push_back(SU); Sequence.push_back(SU);
SU->Cycle = CurCycle; assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
// Top down: release successors. // Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() {
while (!AvailableQueue.empty() || !PendingQueue.empty()) { while (!AvailableQueue.empty() || !PendingQueue.empty()) {
// Check to see if any of the pending instructions are ready to issue. If // Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue. // so, add them to the available queue.
unsigned MinDepth = ~0u;
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
if (PendingQueue[i]->CycleBound == CurCycle) { if (PendingQueue[i]->getDepth() <= CurCycle) {
AvailableQueue.push(PendingQueue[i]); AvailableQueue.push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true; PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back(); PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back(); PendingQueue.pop_back();
--i; --e; --i; --e;
} else { } else if (PendingQueue[i]->getDepth() < MinDepth)
assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?"); MinDepth = PendingQueue[i]->getDepth();
}
} }
// If there are no instructions available, don't try to issue anything. // If there are no instructions available, don't try to issue anything.
if (AvailableQueue.empty()) { if (AvailableQueue.empty()) {
++CurCycle; CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
continue; continue;
} }

View File

@@ -33,115 +33,6 @@ ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb,
ScheduleDAG::~ScheduleDAG() {} ScheduleDAG::~ScheduleDAG() {}
/// CalculateDepths - compute depths using algorithms for the longest
/// paths in the DAG
void ScheduleDAG::CalculateDepths() {
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
// Initialize the data structures
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &SUnits[i];
unsigned Degree = SU->Preds.size();
// Temporarily use the Depth field as scratch space for the degree count.
SU->Depth = Degree;
// Is it a node without dependencies?
if (Degree == 0) {
assert(SU->Preds.empty() && "SUnit should have no predecessors");
// Collect leaf nodes
WorkList.push_back(SU);
}
}
// Process nodes in the topological order
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
unsigned SUDepth = 0;
// Use dynamic programming:
// When current node is being processed, all of its dependencies
// are already processed.
// So, just iterate over all predecessors and take the longest path
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
unsigned PredDepth = I->getSUnit()->Depth;
if (PredDepth+1 > SUDepth) {
SUDepth = PredDepth + 1;
}
}
SU->Depth = SUDepth;
// Update degrees of all nodes depending on current SUnit
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
SUnit *SU = I->getSUnit();
if (!--SU->Depth)
// If all dependencies of the node are processed already,
// then the longest path for the node can be computed now
WorkList.push_back(SU);
}
}
}
/// CalculateHeights - compute heights using algorithms for the longest
/// paths in the DAG
void ScheduleDAG::CalculateHeights() {
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
// Initialize the data structures
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &SUnits[i];
unsigned Degree = SU->Succs.size();
// Temporarily use the Height field as scratch space for the degree count.
SU->Height = Degree;
// Is it a node without dependencies?
if (Degree == 0) {
assert(SU->Succs.empty() && "Something wrong");
assert(WorkList.empty() && "Should be empty");
// Collect leaf nodes
WorkList.push_back(SU);
}
}
// Process nodes in the topological order
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
unsigned SUHeight = 0;
// Use dynamic programming:
// When current node is being processed, all of its dependencies
// are already processed.
// So, just iterate over all successors and take the longest path
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
unsigned SuccHeight = I->getSUnit()->Height;
if (SuccHeight+1 > SUHeight) {
SUHeight = SuccHeight + 1;
}
}
SU->Height = SUHeight;
// Update degrees of all nodes depending on current SUnit
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit *SU = I->getSUnit();
if (!--SU->Height)
// If all dependencies of the node are processed already,
// then the longest path for the node can be computed now
WorkList.push_back(SU);
}
}
}
/// dump - dump the schedule. /// dump - dump the schedule.
void ScheduleDAG::dumpSchedule() const { void ScheduleDAG::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) { for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -171,13 +62,10 @@ void SUnit::addPred(const SDep &D) {
for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
if (Preds[i] == D) if (Preds[i] == D)
return; return;
// Add a pred to this SUnit.
Preds.push_back(D);
// Now add a corresponding succ to N. // Now add a corresponding succ to N.
SDep P = D; SDep P = D;
P.setSUnit(this); P.setSUnit(this);
SUnit *N = D.getSUnit(); SUnit *N = D.getSUnit();
N->Succs.push_back(P);
// Update the bookkeeping. // Update the bookkeeping.
if (D.getKind() == SDep::Data) { if (D.getKind() == SDep::Data) {
++NumPreds; ++NumPreds;
@@ -187,6 +75,10 @@ void SUnit::addPred(const SDep &D) {
++NumPredsLeft; ++NumPredsLeft;
if (!isScheduled) if (!isScheduled)
++N->NumSuccsLeft; ++N->NumSuccsLeft;
N->Succs.push_back(P);
Preds.push_back(D);
this->setDepthDirty();
N->setHeightDirty();
} }
/// removePred - This removes the specified edge as a pred of the current /// removePred - This removes the specified edge as a pred of the current
@@ -220,10 +112,128 @@ void SUnit::removePred(const SDep &D) {
--NumPredsLeft; --NumPredsLeft;
if (!isScheduled) if (!isScheduled)
--N->NumSuccsLeft; --N->NumSuccsLeft;
this->setDepthDirty();
N->setHeightDirty();
return; return;
} }
} }
void SUnit::setDepthDirty() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
if (!SU->isDepthCurrent) continue;
SU->isDepthCurrent = false;
for (SUnit::const_succ_iterator I = Succs.begin(),
E = Succs.end(); I != E; ++I)
WorkList.push_back(I->getSUnit());
}
}
void SUnit::setHeightDirty() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
if (!SU->isHeightCurrent) continue;
SU->isHeightCurrent = false;
for (SUnit::const_pred_iterator I = Preds.begin(),
E = Preds.end(); I != E; ++I)
WorkList.push_back(I->getSUnit());
}
}
/// setDepthToAtLeast - Update this node's successors to reflect the
/// fact that this node's depth just increased.
///
void SUnit::setDepthToAtLeast(unsigned NewDepth) {
if (NewDepth <= Depth)
return;
setDepthDirty();
Depth = NewDepth;
isDepthCurrent = true;
}
/// setHeightToAtLeast - Update this node's predecessors to reflect the
/// fact that this node's height just increased.
///
void SUnit::setHeightToAtLeast(unsigned NewHeight) {
if (NewHeight <= Height)
return;
setHeightDirty();
Height = NewHeight;
isHeightCurrent = true;
}
/// ComputeDepth - Calculate the maximal path from the node to the exit.
///
void SUnit::ComputeDepth() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
while (!WorkList.empty()) {
SUnit *Cur = WorkList.back();
bool Done = true;
unsigned MaxPredDepth = 0;
for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
E = Cur->Preds.end(); I != E; ++I) {
SUnit *PredSU = I->getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
PredSU->Depth + I->getLatency());
else {
Done = false;
WorkList.push_back(PredSU);
}
}
if (Done) {
WorkList.pop_back();
if (MaxPredDepth != Cur->Depth) {
Cur->setDepthDirty();
Cur->Depth = MaxPredDepth;
}
Cur->isDepthCurrent = true;
}
}
}
/// ComputeHeight - Calculate the maximal path from the node to the entry.
///
void SUnit::ComputeHeight() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
while (!WorkList.empty()) {
SUnit *Cur = WorkList.back();
bool Done = true;
unsigned MaxSuccHeight = 0;
for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
E = Cur->Succs.end(); I != E; ++I) {
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
SuccSU->Height + I->getLatency());
else {
Done = false;
WorkList.push_back(SuccSU);
}
}
if (Done) {
WorkList.pop_back();
if (MaxSuccHeight != Cur->Height) {
Cur->setHeightDirty();
Cur->Height = MaxSuccHeight;
}
Cur->isHeightCurrent = true;
}
}
}
/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
/// a group of nodes flagged together. /// a group of nodes flagged together.
void SUnit::dump(const ScheduleDAG *G) const { void SUnit::dump(const ScheduleDAG *G) const {
@@ -299,11 +309,14 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
cerr << "has not been scheduled!\n"; cerr << "has not been scheduled!\n";
AnyNotSched = true; AnyNotSched = true;
} }
if (SUnits[i].isScheduled && SUnits[i].Cycle > (unsigned)INT_MAX) { if (SUnits[i].isScheduled &&
(isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
unsigned(INT_MAX)) {
if (!AnyNotSched) if (!AnyNotSched)
cerr << "*** Scheduling failed! ***\n"; cerr << "*** Scheduling failed! ***\n";
SUnits[i].dump(this); SUnits[i].dump(this);
cerr << "has an unexpected Cycle value!\n"; cerr << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true; AnyNotSched = true;
} }
if (isBottomUp) { if (isBottomUp) {

View File

@@ -13,19 +13,27 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sched-instrs" #define DEBUG_TYPE "sched-instrs"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtarget.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include <map> #include <map>
using namespace llvm; using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
const TargetMachine &tm) const TargetMachine &tm,
: ScheduleDAG(0, bb, tm) {} const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
: ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {}
void ScheduleDAGInstrs::BuildSchedUnits() { void ScheduleDAGInstrs::BuildSchedUnits() {
SUnits.clear(); SUnits.clear();
@@ -35,7 +43,7 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// to top. // to top.
// Remember where defs and uses of each physical register are as we procede. // Remember where defs and uses of each physical register are as we procede.
SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {}; std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {}; std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
// Remember where unknown loads are after the most recent unknown store // Remember where unknown loads are after the most recent unknown store
@@ -57,12 +65,19 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// all the work of the block is done before the terminator. // all the work of the block is done before the terminator.
SUnit *Terminator = 0; SUnit *Terminator = 0;
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin(); for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
MII != MIE; --MII) { MII != MIE; --MII) {
MachineInstr *MI = prior(MII); MachineInstr *MI = prior(MII);
const TargetInstrDesc &TID = MI->getDesc();
SUnit *SU = NewSUnit(MI); SUnit *SU = NewSUnit(MI);
// Assign the Latency field of SU using target-provided information. // Assign the Latency field of SU using target-provided information.
if (UnitLatencies)
SU->Latency = 1;
else
ComputeLatency(SU); ComputeLatency(SU);
// Add register-based dependencies (data, anti, and output). // Add register-based dependencies (data, anti, and output).
@@ -74,33 +89,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
std::vector<SUnit *> &UseList = Uses[Reg]; std::vector<SUnit *> &UseList = Uses[Reg];
SUnit *&Def = Defs[Reg]; std::vector<SUnit *> &DefList = Defs[Reg];
// Optionally add output and anti dependencies. // Optionally add output and anti dependencies.
// TODO: Using a latency of 1 here assumes there's no cost for // TODO: Using a latency of 1 here assumes there's no cost for
// reusing registers. // reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
if (Def && Def != SU) for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); SUnit *DefSU = DefList[i];
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
SUnit *&Def = Defs[*Alias]; std::vector<SUnit *> &DefList = Defs[*Alias];
if (Def && Def != SU) for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); SUnit *DefSU = DefList[i];
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
}
} }
if (MO.isDef()) { if (MO.isDef()) {
// Add any data dependencies. // Add any data dependencies.
for (unsigned i = 0, e = UseList.size(); i != e; ++i) unsigned DataLatency = SU->Latency;
if (UseList[i] != SU) for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, Reg)); SUnit *UseSU = UseList[i];
if (UseSU != SU) {
UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
}
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
std::vector<SUnit *> &UseList = Uses[*Alias]; std::vector<SUnit *> &UseList = Uses[*Alias];
for (unsigned i = 0, e = UseList.size(); i != e; ++i) for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
if (UseList[i] != SU) SUnit *UseSU = UseList[i];
UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, *Alias)); if (UseSU != SU)
UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
}
} }
UseList.clear(); UseList.clear();
Def = SU; if (!MO.isDead())
DefList.clear();
DefList.push_back(SU);
} else { } else {
UseList.push_back(SU); UseList.push_back(SU);
} }
@@ -111,7 +144,6 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// after stack slots are lowered to actual addresses. // after stack slots are lowered to actual addresses.
// TODO: Use an AliasAnalysis and do real alias-analysis queries, and // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
// produce more precise dependence information. // produce more precise dependence information.
const TargetInstrDesc &TID = MI->getDesc();
if (TID.isCall() || TID.isReturn() || TID.isBranch() || if (TID.isCall() || TID.isReturn() || TID.isBranch() ||
TID.hasUnmodeledSideEffects()) { TID.hasUnmodeledSideEffects()) {
new_chain: new_chain:

View File

@@ -99,6 +99,9 @@ private:
SmallVector<SUnit*, 2>&); SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp(); void ListScheduleBottomUp();
/// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
bool ForceUnitLatencies() const { return true; }
}; };
} // end anonymous namespace } // end anonymous namespace
@@ -153,7 +156,8 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: "; DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
SU->Cycle = CurCycle; assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU); Sequence.push_back(SU);
// Bottom up: release predecessors // Bottom up: release predecessors
@@ -177,7 +181,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) { I != E; ++I) {
if (I->isAssignedRegDep()) { if (I->isAssignedRegDep()) {
if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) { if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == SU && assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?"); "Physical register dependency violated?");
@@ -247,9 +251,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
} }
if (TID.isCommutable()) if (TID.isCommutable())
NewSU->isCommutable = true; NewSU->isCommutable = true;
// FIXME: Calculate height / depth and propagate the changes?
NewSU->Depth = SU->Depth;
NewSU->Height = SU->Height;
// LoadNode may already exist. This can happen when there is another // LoadNode may already exist. This can happen when there is another
// load from the same location and producing the same type of value // load from the same location and producing the same type of value
@@ -262,9 +263,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
} else { } else {
LoadSU = NewSUnit(LoadNode); LoadSU = NewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum); LoadNode->setNodeId(LoadSU->NodeNum);
LoadSU->Depth = SU->Depth;
LoadSU->Height = SU->Height;
} }
SDep ChainPred; SDep ChainPred;
@@ -344,10 +342,8 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors. // New SUnit has the exact same predecessors.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) I != E; ++I)
if (!I->isArtificial()) { if (!I->isArtificial())
AddPred(NewSU, *I); AddPred(NewSU, *I);
NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
}
// Only copy scheduled successors. Cut them from old node's successor // Only copy scheduled successors. Cut them from old node's successor
// list and move them over. // list and move them over.
@@ -358,7 +354,6 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
continue; continue;
SUnit *SuccSU = I->getSUnit(); SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) { if (SuccSU->isScheduled) {
NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
SDep D = *I; SDep D = *I;
D.setSUnit(NewSU); D.setSUnit(NewSU);
AddPred(SuccSU, D); AddPred(SuccSU, D);

View File

@@ -120,10 +120,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
} }
#endif #endif
// Compute the cycle when this SUnit actually becomes available. This SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
// is the max of the start time of all predecessors plus their latencies.
unsigned PredDoneCycle = SU->Cycle + SU->Latency;
SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
if (SuccSU->NumPredsLeft == 0) { if (SuccSU->NumPredsLeft == 0) {
PendingQueue.push_back(SuccSU); PendingQueue.push_back(SuccSU);
@@ -138,7 +135,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
Sequence.push_back(SU); Sequence.push_back(SU);
SU->Cycle = CurCycle; assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
// Top down: release successors. // Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -171,14 +169,14 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Check to see if any of the pending instructions are ready to issue. If // Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue. // so, add them to the available queue.
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
if (PendingQueue[i]->CycleBound == CurCycle) { if (PendingQueue[i]->getDepth() == CurCycle) {
AvailableQueue->push(PendingQueue[i]); AvailableQueue->push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true; PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back(); PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back(); PendingQueue.pop_back();
--i; --e; --i; --e;
} else { } else {
assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?"); assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
} }
} }

View File

@@ -154,6 +154,10 @@ private:
Topo.InitDAGTopologicalSorting(); Topo.InitDAGTopologicalSorting();
return NewNode; return NewNode;
} }
/// ForceUnitLatencies - Return true, since register-pressure-reducing
/// scheduling doesn't need actual latency information.
bool ForceUnitLatencies() const { return true; }
}; };
} // end anonymous namespace } // end anonymous namespace
@@ -171,8 +175,6 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this)); SUnits[su].dumpAll(this));
CalculateDepths();
CalculateHeights();
Topo.InitDAGTopologicalSorting(); Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits); AvailableQueue->initNodes(SUnits);
@@ -272,7 +274,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: "; DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
SU->Cycle = CurCycle; assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU); Sequence.push_back(SU);
// Bottom up: release predecessors // Bottom up: release predecessors
@@ -296,7 +299,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) { I != E; ++I) {
if (I->isAssignedRegDep()) { if (I->isAssignedRegDep()) {
if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) { if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == SU && assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?"); "Physical register dependency violated?");
@@ -328,7 +331,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
/// its predecessor states to reflect the change. /// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
AvailableQueue->UnscheduledNode(SU); AvailableQueue->UnscheduledNode(SU);
@@ -336,7 +339,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) { I != E; ++I) {
CapturePred(&*I); CapturePred(&*I);
if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) { if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() && assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?"); "Physical register dependency violated?");
@@ -353,12 +356,12 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
LiveRegDefs[I->getReg()] = SU; LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs; ++NumLiveRegs;
} }
if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()]) if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle; LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
} }
} }
SU->Cycle = 0; SU->setHeightDirty();
SU->isScheduled = false; SU->isScheduled = false;
SU->isAvailable = true; SU->isAvailable = true;
AvailableQueue->push(SU); AvailableQueue->push(SU);
@@ -443,9 +446,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else { } else {
LoadSU = CreateNewSUnit(LoadNode); LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum); LoadNode->setNodeId(LoadSU->NodeNum);
LoadSU->Depth = SU->Depth;
LoadSU->Height = SU->Height;
ComputeLatency(LoadSU); ComputeLatency(LoadSU);
} }
@@ -462,9 +462,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} }
if (TID.isCommutable()) if (TID.isCommutable())
NewSU->isCommutable = true; NewSU->isCommutable = true;
// FIXME: Calculate height / depth and propagate the changes?
NewSU->Depth = SU->Depth;
NewSU->Height = SU->Height;
ComputeLatency(NewSU); ComputeLatency(NewSU);
SDep ChainPred; SDep ChainPred;
@@ -548,10 +545,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors. // New SUnit has the exact same predecessors.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) I != E; ++I)
if (!I->isArtificial()) { if (!I->isArtificial())
AddPred(NewSU, *I); AddPred(NewSU, *I);
NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
}
// Only copy scheduled successors. Cut them from old node's successor // Only copy scheduled successors. Cut them from old node's successor
// list and move them over. // list and move them over.
@@ -562,7 +557,6 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
continue; continue;
SUnit *SuccSU = I->getSUnit(); SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) { if (SuccSU->isScheduled) {
NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
SDep D = *I; SDep D = *I;
D.setSUnit(NewSU); D.setSUnit(NewSU);
AddPred(SuccSU, D); AddPred(SuccSU, D);
@@ -570,9 +564,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
DelDeps.push_back(std::make_pair(SuccSU, D)); DelDeps.push_back(std::make_pair(SuccSU, D));
} }
} }
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second); RemovePred(DelDeps[i].first, DelDeps[i].second);
}
AvailableQueue->updateNode(SU); AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU); AvailableQueue->addNode(NewSU);
@@ -590,8 +583,6 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SUnit *CopyFromSU = CreateNewSUnit(NULL); SUnit *CopyFromSU = CreateNewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC; CopyFromSU->CopyDstRC = DestRC;
CopyFromSU->Depth = SU->Depth;
CopyFromSU->Height = SU->Height;
SUnit *CopyToSU = CreateNewSUnit(NULL); SUnit *CopyToSU = CreateNewSUnit(NULL);
CopyToSU->CopySrcRC = DestRC; CopyToSU->CopySrcRC = DestRC;
@@ -870,7 +861,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DOUT << "*** Scheduling [" << CurCycle << "]: "; DOUT << "*** Scheduling [" << CurCycle << "]: ";
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
SU->Cycle = CurCycle; assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
Sequence.push_back(SU); Sequence.push_back(SU);
// Top down: release successors // Top down: release successors
@@ -1107,19 +1099,19 @@ namespace {
/// closestSucc - Returns the scheduled cycle of the successor which is /// closestSucc - Returns the scheduled cycle of the successor which is
/// closet to the current cycle. /// closet to the current cycle.
static unsigned closestSucc(const SUnit *SU) { static unsigned closestSucc(const SUnit *SU) {
unsigned MaxCycle = 0; unsigned MaxHeight = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) { I != E; ++I) {
unsigned Cycle = I->getSUnit()->Cycle; unsigned Height = I->getSUnit()->getHeight();
// If there are bunch of CopyToRegs stacked up, they should be considered // If there are bunch of CopyToRegs stacked up, they should be considered
// to be at the same position. // to be at the same position.
if (I->getSUnit()->getNode() && if (I->getSUnit()->getNode() &&
I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
Cycle = closestSucc(I->getSUnit())+1; Height = closestSucc(I->getSUnit())+1;
if (Cycle > MaxCycle) if (Height > MaxHeight)
MaxCycle = Cycle; MaxHeight = Height;
} }
return MaxCycle; return MaxHeight;
} }
/// calcMaxScratches - Returns an cost estimate of the worse case requirement /// calcMaxScratches - Returns an cost estimate of the worse case requirement
@@ -1182,11 +1174,11 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (LScratch != RScratch) if (LScratch != RScratch)
return LScratch > RScratch; return LScratch > RScratch;
if (left->Height != right->Height) if (left->getHeight() != right->getHeight())
return left->Height > right->Height; return left->getHeight() > right->getHeight();
if (left->Depth != right->Depth) if (left->getDepth() != right->getDepth())
return left->Depth < right->Depth; return left->getDepth() < right->getDepth();
assert(left->NodeQueueId && right->NodeQueueId && assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero"); "NodeQueueId cannot be zero");
@@ -1294,7 +1286,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
continue; continue;
// Be conservative. Ignore if nodes aren't at roughly the same // Be conservative. Ignore if nodes aren't at roughly the same
// depth and height. // depth and height.
if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) if (SuccSU->getHeight() < SU->getHeight() &&
(SU->getHeight() - SuccSU->getHeight()) > 1)
continue; continue;
if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
continue; continue;
@@ -1384,8 +1377,8 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (LPriority+LBonus != RPriority+RBonus) if (LPriority+LBonus != RPriority+RBonus)
return LPriority+LBonus < RPriority+RBonus; return LPriority+LBonus < RPriority+RBonus;
if (left->Depth != right->Depth) if (left->getDepth() != right->getDepth())
return left->Depth < right->Depth; return left->getDepth() < right->getDepth();
if (left->NumSuccsLeft != right->NumSuccsLeft) if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft; return left->NumSuccsLeft > right->NumSuccsLeft;

View File

@@ -80,6 +80,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
E = DAG->allnodes_end(); NI != E; ++NI) E = DAG->allnodes_end(); NI != E; ++NI)
NI->setNodeId(-1); NI->setNodeId(-1);
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
E = DAG->allnodes_end(); NI != E; ++NI) { E = DAG->allnodes_end(); NI != E; ++NI) {
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
@@ -133,6 +136,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
N->setNodeId(NodeSUnit->NodeNum); N->setNodeId(NodeSUnit->NodeNum);
// Assign the Latency field of NodeSUnit using target-provided information. // Assign the Latency field of NodeSUnit using target-provided information.
if (UnitLatencies)
NodeSUnit->Latency = 1;
else
ComputeLatency(NodeSUnit); ComputeLatency(NodeSUnit);
} }

View File

@@ -1,10 +1,8 @@
; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep pcmpeqd ; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep pcmpeqd | count 1
; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
; On x86-64, this testcase shouldn't need to spill the -1 value, ; This testcase shouldn't need to spill the -1 value,
; so it should just use pcmpeqd to materialize an all-ones vector. ; so it should just use pcmpeqd to materialize an all-ones vector.
; On x86-32, there aren't enough registers, so an all-ones
; constant pool should be created so it can be folded.
%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }> %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
%struct._cl_image_format_t = type <{ i32, i32, i32 }> %struct._cl_image_format_t = type <{ i32, i32, i32 }>