Introducing a new method of tracking register pressure. We can't

precisely track pressure on a selection DAG, but we can at least keep
it balanced. This design accounts for various interesting aspects of
selection DAGS: register and subregister copies, glued nodes, dead
nodes, unused registers, etc.

Added SUnit::NumRegDefsLeft and ScheduleDAGSDNodes::RegDefIter.

Note: I disabled PrescheduleNodesWithMultipleUses when register
pressure is enabled, based on no evidence other than I don't think it
makes sense to have both enabled.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124853 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2011-02-04 03:18:17 +00:00
parent f827cd717b
commit 92e946630d
5 changed files with 176 additions and 120 deletions

View File

@ -248,6 +248,7 @@ namespace llvm {
unsigned NumSuccs; // # of SDep::Data sucss.
unsigned NumPredsLeft; // # of preds not scheduled.
unsigned NumSuccsLeft; // # of succs not scheduled.
unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use.
unsigned short Latency; // Node latency.
bool isCall : 1; // Is a function call.
bool isTwoAddress : 1; // Is a two-address instruction.
@ -276,7 +277,7 @@ namespace llvm {
SUnit(SDNode *node, unsigned nodenum)
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), Latency(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@ -290,7 +291,7 @@ namespace llvm {
SUnit(MachineInstr *instr, unsigned nodenum)
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), Latency(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@ -303,7 +304,7 @@ namespace llvm {
SUnit()
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), Latency(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@ -347,7 +348,7 @@ namespace llvm {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
void addPred(const SDep &D);
bool addPred(const SDep &D);
/// removePred - This removes the specified edge as a pred of the current
/// node if it exists. It also removes the current node as a successor of
@ -442,6 +443,8 @@ namespace llvm {
bool hasReadyFilter() const { return HasReadyFilter; }
virtual bool tracksRegPressure() const { return false; }
virtual bool isReady(SUnit *) const {
assert(!HasReadyFilter && "The ready filter must override isReady()");
return true;

View File

@ -75,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
void SUnit::addPred(const SDep &D) {
bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D)
return;
return false;
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
@ -106,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
this->setDepthDirty();
N->setHeightDirty();
}
return true;
}
/// removePred - This removes the specified edge as a pred of the current
@ -285,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
dbgs() << " Height : " << Height << "\n";

View File

@ -706,6 +706,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
@ -722,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@ -772,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
// Balance register pressure.
if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
&& !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
--NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@ -1436,6 +1444,8 @@ public:
SU->NodeQueueId = 0;
}
bool tracksRegPressure() const { return TracksRegPressure; }
void dumpRegPressure() const;
bool HighRegPressure(const SUnit *SU) const;
@ -1571,7 +1581,8 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
PrescheduleNodesWithMultipleUses();
if (!TracksRegPressure)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
}
@ -1642,64 +1653,20 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (I->isCtrl())
continue;
SUnit *PredSU = I->getSUnit();
// NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
// counts data deps. To be more precise, we could maintain a
// NumDataSuccsLeft count.
if (PredSU->NumSuccsLeft != PredSU->Succs.size()) {
DEBUG(dbgs() << " SU(" << PredSU->NodeNum << ") live across SU("
<< SU->NodeNum << ")\n");
// NumRegDefsLeft is zero when enough uses of this node have been scheduled
// to cover the number of registers defined (they are all live).
if (PredSU->NumRegDefsLeft == 0) {
continue;
}
const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyFromReg) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
}
continue;
}
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue;
if (POpc == TargetOpcode::EXTRACT_SUBREG) {
EVT VT = PN->getOperand(0).getValueType();
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance()) {
EVT VT = RegDefPos.GetValue();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
continue;
} else if (POpc == TargetOpcode::INSERT_SUBREG ||
POpc == TargetOpcode::SUBREG_TO_REG) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
continue;
}
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = PN->getValueType(i);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
if (RegPressure[RCId] >= RegLimit[RCId])
return true; // Reg pressure already high.
unsigned Cost = TLI->getRepRegClassCostFor(VT);
if (!PN->hasAnyUseOfValue(i))
continue;
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
}
}
return false;
}
@ -1725,80 +1692,64 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
const SDNode *N = SU->getNode();
if (!N->isMachineOpcode()) {
if (N->getOpcode() != ISD::CopyToReg)
return;
} else {
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
}
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl())
continue;
SUnit *PredSU = I->getSUnit();
// NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
// counts data deps.
if (PredSU->NumSuccsLeft != PredSU->Succs.size())
continue;
const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyFromReg) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
// NumRegDefsLeft is zero when enough uses of this node have been scheduled
// to cover the number of registers defined (they are all live).
if (PredSU->NumRegDefsLeft == 0) {
continue;
}
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue;
if (POpc == TargetOpcode::EXTRACT_SUBREG) {
EVT VT = PN->getOperand(0).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
} else if (POpc == TargetOpcode::INSERT_SUBREG ||
POpc == TargetOpcode::SUBREG_TO_REG) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
}
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = PN->getValueType(i);
if (!PN->hasAnyUseOfValue(i))
// FIXME: The ScheduleDAG currently loses information about which of a
// node's values is consumed by each dependence. Consequently, if the node
// defines multiple register classes, we don't know which to pressurize
// here. Instead the following loop consumes the register defs in an
// arbitrary order. At least it handles the common case of clustered loads
// to the same class. For precise liveness, each SDep needs to indicate the
// result number. But that tightly couples the ScheduleDAG with the
// SelectionDAG making updates tricky. A simpler hack would be to attach a
// value type or register class to SDep.
//
// The most important aspect of register tracking is balancing the increase
// here with the reduction further below. Note that this SU may use multiple
// defs in PredSU. The can't be determined here, but we've already
// compensated by reducing NumRegDefsLeft in PredSU during
// ScheduleDAGSDNodes::AddSchedEdges.
--PredSU->NumRegDefsLeft;
unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
if (SkipRegDefs)
continue;
EVT VT = RegDefPos.GetValue();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
break;
}
}
// Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
// may transfer data dependencies to CopyToReg.
if (SU->NumSuccs && N->isMachineOpcode()) {
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = N->getValueType(i);
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
// Register pressure tracking is imprecise. This can happen.
RegPressure[RCId] = 0;
else
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
// We should have this assert, but there may be dead SDNodes that never
// materialize as SUnits, so they don't appear to generate liveness.
//assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
int SkipRegDefs = (int)SU->NumRegDefsLeft;
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
if (SkipRegDefs > 0)
continue;
EVT VT = RegDefPos.GetValue();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
RegPressure[RCId] = 0;
}
else {
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
}
dumpRegPressure();
}
@ -2314,7 +2265,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
// like other nodes from the perspective of scheduling // heuristics.
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister

View File

@ -332,6 +332,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
@ -407,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
SU->addPred(dep);
if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
// a single use, so to keep pressure balanced we reduce the defs.
--OpSU->NumRegDefsLeft;
}
}
}
}
@ -426,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
AddSchedEdges();
}
// Initialize NumNodeDefs for the current Node's opcode.
void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
if (!Node->isMachineOpcode()) {
if (Node->getOpcode() == ISD::CopyFromReg)
NodeNumDefs = 1;
else
NodeNumDefs = 0;
return;
}
unsigned POpc = Node->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF) {
// No register need be allocated for this.
NodeNumDefs = 0;
return;
}
unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
// Some instructions define regs that are not represented in the selection DAG
// (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
DefIdx = 0;
}
// Construct a RegDefIter for this SUnit and find the first valid value.
ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
const ScheduleDAGSDNodes *SD)
: SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
InitNodeNumDefs();
Advance();
}
// Advance to the next valid value defined by the SUnit.
void ScheduleDAGSDNodes::RegDefIter::Advance() {
for (;Node;) { // Visit all glued nodes.
for (;DefIdx < NodeNumDefs; ++DefIdx) {
if (!Node->hasAnyUseOfValue(DefIdx))
continue;
if (Node->isMachineOpcode() &&
Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
// Propagate the incoming (full-register) type. I doubt it's needed.
ValueType = Node->getOperand(0).getValueType();
}
else {
ValueType = Node->getValueType(DefIdx);
}
++DefIdx;
return; // Found a normal regdef.
}
Node = Node->getGluedNode();
if (Node == NULL) {
return; // No values left to visit.
}
InitNodeNumDefs();
}
}
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
++SU->NumRegDefsLeft;
}
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {

View File

@ -20,7 +20,7 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
///
///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
@ -73,13 +73,17 @@ namespace llvm {
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void InitNumRegDefsLeft(SUnit *SU);
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@ -106,6 +110,30 @@ namespace llvm {
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
/// RegDefIter - In place iteration over the values defined by an
/// SUnit. This does not need copies of the iterator or any other STLisms.
/// The iterator creates itself, rather than being provided by the SchedDAG.
class RegDefIter {
const ScheduleDAGSDNodes *SchedDAG;
const SDNode *Node;
unsigned DefIdx;
unsigned NodeNumDefs;
EVT ValueType;
public:
RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
bool IsValid() const { return Node != NULL; }
EVT GetValue() const {
assert(IsValid() && "bad iterator");
return ValueType;
}
void Advance();
private:
void InitNodeNumDefs();
};
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.