From e165a78551a91d8420cd8f074d97701e8788f8b5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 11 May 2006 23:55:42 +0000 Subject: [PATCH] Refactor scheduler code. Move register-reduction list scheduler to a separate file. Added an initial implementation of top-down register pressure reduction list scheduler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28226 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAG.h | 111 +++ lib/CodeGen/SelectionDAG/ScheduleDAG.cpp | 251 +++++ lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 927 +----------------- .../SelectionDAG/ScheduleDAGRRList.cpp | 813 +++++++++++++++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 45 +- 5 files changed, 1215 insertions(+), 932 deletions(-) create mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index f72285e0f45..5f9236d4017 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -17,6 +17,8 @@ #include "llvm/CodeGen/SelectionDAG.h" +#include + namespace llvm { struct InstrStage; class MachineConstantPool; @@ -71,8 +73,88 @@ namespace llvm { } }; + /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or + /// a group of nodes flagged together. + struct SUnit { + SDNode *Node; // Representative node. + std::vector FlaggedNodes; // All nodes flagged to Node. + + // Preds/Succs - The SUnits before/after us in the graph. The boolean value + // is true if the edge is a token chain edge, false if it is a value edge. + std::set > Preds; // All sunit predecessors. + std::set > Succs; // All sunit successors. + + short NumPreds; // # of preds. + short NumSuccs; // # of sucss. + short NumPredsLeft; // # of preds not scheduled. + short NumSuccsLeft; // # of succs not scheduled. + short NumChainPredsLeft; // # of chain preds not scheduled. + short NumChainSuccsLeft; // # of chain succs not scheduled. + bool isTwoAddress : 1; // Is a two-address instruction. + bool isDefNUseOperand : 1; // Is a def&use operand. + bool isPending : 1; // True once pending. + bool isAvailable : 1; // True once available. + bool isScheduled : 1; // True once scheduled. + unsigned short Latency; // Node latency. + unsigned CycleBound; // Upper/lower cycle to be scheduled at. + unsigned Cycle; // Once scheduled, the cycle of the op. + unsigned Depth; // Node depth; + unsigned Height; // Node height; + unsigned NodeNum; // Entry # of node in the node vector. + + SUnit(SDNode *node, unsigned nodenum) + : Node(node), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), + NumChainPredsLeft(0), NumChainSuccsLeft(0), + isTwoAddress(false), isDefNUseOperand(false), + isPending(false), isAvailable(false), isScheduled(false), + Latency(0), CycleBound(0), Cycle(0), Depth(0), Height(0), + NodeNum(nodenum) {} + + void dump(const SelectionDAG *G) const; + void dumpAll(const SelectionDAG *G) const; + }; + + //===--------------------------------------------------------------------===// + /// SchedulingPriorityQueue - This interface is used to plug different + /// priorities computation algorithms into the list scheduler. It implements + /// the interface of a standard priority queue, where nodes are inserted in + /// arbitrary order and returned in priority order. The computation of the + /// priority and the representation of the queue are totally up to the + /// implementation to decide. + /// + class SchedulingPriorityQueue { + public: + virtual ~SchedulingPriorityQueue() {} + + virtual void initNodes(const std::vector &SUnits) = 0; + virtual void releaseState() = 0; + + virtual bool empty() const = 0; + virtual void push(SUnit *U) = 0; + + virtual void push_all(const std::vector &Nodes) = 0; + virtual SUnit *pop() = 0; + + /// ScheduledNode - As each node is scheduled, this method is invoked. This + /// allows the priority function to adjust the priority of node that have + /// already been emitted. + virtual void ScheduledNode(SUnit *Node) {} + }; + class ScheduleDAG { public: + + // Scheduling heuristics + enum SchedHeuristics { + defaultScheduling, // Let the target specify its preference. + noScheduling, // No scheduling, emit breadth first sequence. + simpleScheduling, // Two pass, min. critical path, max. utilization. + simpleNoItinScheduling, // Same as above exact using generic latency. + listSchedulingBURR, // Bottom-up reg reduction list scheduling. + listSchedulingTDRR, // Top-down reg reduction list scheduling. + listSchedulingTD // Top-down list scheduler. + }; + SelectionDAG &DAG; // DAG of the current basic block MachineBasicBlock *BB; // Current basic block const TargetMachine &TM; // Target processor @@ -80,6 +162,10 @@ namespace llvm { const MRegisterInfo *MRI; // Target processor register info SSARegMap *RegMap; // Virtual/real register map MachineConstantPool *ConstPool; // Target constant pool + std::vector Sequence; // The schedule. Null SUnit*'s represent + // noop instructions. + std::map SUnitMap; // SDNode to SUnit mapping (n -> 1). + std::vector SUnits; // The scheduling units. ScheduleDAG(SelectionDAG &dag, MachineBasicBlock *bb, const TargetMachine &tm) @@ -105,6 +191,23 @@ namespace llvm { return false; } + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(SDNode *N) { + SUnits.push_back(SUnit(N, SUnits.size())); + return &SUnits.back(); + } + + /// BuildSchedUnits - Build SUnits from the selection dag that we are input. + /// This SUnit graph is similar to the SelectionDAG, but represents flagged + /// together nodes with a single SUnit. + void BuildSchedUnits(); + + /// CalculateDepths, CalculateHeights - Calculate node depth / height. + /// + void CalculateDepths(); + void CalculateHeights(); + /// EmitNode - Generate machine code for an node and needed dependencies. /// VRBaseMap contains, for each already emitted node, the first virtual /// register number for the results of the node. @@ -115,6 +218,9 @@ namespace llvm { /// void EmitNoop(); + void EmitSchedule(); + + void dumpSchedule() const; /// Schedule - Order nodes according to selected style. /// @@ -138,6 +244,11 @@ namespace llvm { ScheduleDAG* createBURRListDAGScheduler(SelectionDAG &DAG, MachineBasicBlock *BB); + /// createTDRRListDAGScheduler - This creates a top down register usage + /// reduction list scheduler. + ScheduleDAG* createTDRRListDAGScheduler(SelectionDAG &DAG, + MachineBasicBlock *BB); + /// createTDListDAGScheduler - This creates a top-down list scheduler with /// the specified hazard recognizer. This takes ownership of the hazard /// recognizer and deletes it when done. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index f9749903a79..4a9b9c7f04e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -20,10 +21,185 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" +#include using namespace llvm; +/// BuildSchedUnits - Build SUnits from the selection dag that we are input. +/// This SUnit graph is similar to the SelectionDAG, but represents flagged +/// together nodes with a single SUnit. +void ScheduleDAG::BuildSchedUnits() { + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), + E = DAG.allnodes_end(); NI != E; ++NI) { + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (SUnitMap[NI]) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required the be the last operand and result of a node. + + // Scan up, adding flagged preds to FlaggedNodes. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N = N->getOperand(N->getNumOperands()-1).Val; + NodeSUnit->FlaggedNodes.push_back(N); + SUnitMap[N] = NodeSUnit; + } + + // Scan down, adding this node and any flagged succs to FlaggedNodes if they + // have a user of the flag operand. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDOperand FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperand(*UI)) { + HasFlagUse = true; + NodeSUnit->FlaggedNodes.push_back(N); + SUnitMap[N] = NodeSUnit; + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. + // Update the SUnit + NodeSUnit->Node = N; + SUnitMap[N] = NodeSUnit; + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + if (InstrItins.isEmpty()) { + // No latency information. + NodeSUnit->Latency = 1; + } else { + NodeSUnit->Latency = 0; + if (N->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + NodeSUnit->Latency += S->Cycles; + } + for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { + SDNode *FNode = NodeSUnit->FlaggedNodes[i]; + if (FNode->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + NodeSUnit->Latency += S->Cycles; + } + } + } + } + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->Node; + + if (MainNode->isTargetOpcode()) { + unsigned Opc = MainNode->getTargetOpcode(); + if (TII->isTwoAddrInstr(Opc)) { + SU->isTwoAddress = true; + SDNode *OpN = MainNode->getOperand(0).Val; + SUnit *OpSU = SUnitMap[OpN]; + if (OpSU) + OpSU->isDefNUseOperand = true; + } + } + + // Find all predecessors and successors of the group. + // Temporarily add N to make code simpler. + SU->FlaggedNodes.push_back(MainNode); + + for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { + SDNode *N = SU->FlaggedNodes[n]; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).Val; + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = SUnitMap[OpN]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + MVT::ValueType OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) { + if (!isChain) { + SU->NumPreds++; + SU->NumPredsLeft++; + } else { + SU->NumChainPredsLeft++; + } + } + if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) { + if (!isChain) { + OpSU->NumSuccs++; + OpSU->NumSuccsLeft++; + } else { + OpSU->NumChainSuccsLeft++; + } + } + } + } + + // Remove MainNode from FlaggedNodes again. + SU->FlaggedNodes.pop_back(); + } + + return; +} + +static void CalculateDepths(SUnit *SU, unsigned Depth) { + if (Depth > SU->Depth) SU->Depth = Depth; + for (std::set >::iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) + CalculateDepths(I->first, Depth+1); +} + +void ScheduleDAG::CalculateDepths() { + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + ::CalculateDepths(Entry, 0U); + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) + if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { + ::CalculateDepths(&SUnits[i], 0U); + } +} + +static void CalculateHeights(SUnit *SU, unsigned Height) { + if (Height > SU->Height) SU->Height = Height; + for (std::set >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) + CalculateHeights(I->first, Height+1); +} +void ScheduleDAG::CalculateHeights() { + SUnit *Root = SUnitMap[DAG.getRoot().Val]; + ::CalculateHeights(Root, 0U); +} + /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands (which do /// not go into the machine instrs.) @@ -348,6 +524,32 @@ void ScheduleDAG::EmitNoop() { TII->insertNoop(*BB, BB->end()); } +/// EmitSchedule - Emit the machine code in scheduled order. +void ScheduleDAG::EmitSchedule() { + std::map VRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) { + for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) + EmitNode(SU->FlaggedNodes[j], VRBaseMap); + EmitNode(SU->Node, VRBaseMap); + } else { + // Null SUnit* is a noop. + EmitNoop(); + } + } +} + +/// dump - dump the schedule. +void ScheduleDAG::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(&DAG); + else + std::cerr << "**** NOOP ****\n"; + } +} + + /// Run - perform scheduling. /// MachineBasicBlock *ScheduleDAG::Run() { @@ -360,4 +562,53 @@ MachineBasicBlock *ScheduleDAG::Run() { return BB; } +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const SelectionDAG *G) const { + std::cerr << "SU(" << NodeNum << "): "; + Node->dump(G); + std::cerr << "\n"; + if (FlaggedNodes.size() != 0) { + for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { + std::cerr << " "; + FlaggedNodes[i]->dump(G); + std::cerr << "\n"; + } + } +} +void SUnit::dumpAll(const SelectionDAG *G) const { + dump(G); + + std::cerr << " # preds left : " << NumPredsLeft << "\n"; + std::cerr << " # succs left : " << NumSuccsLeft << "\n"; + std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; + std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; + std::cerr << " Latency : " << Latency << "\n"; + std::cerr << " Depth : " << Depth << "\n"; + std::cerr << " Height : " << Height << "\n"; + + if (Preds.size() != 0) { + std::cerr << " Predecessors:\n"; + for (std::set >::const_iterator I = Preds.begin(), + E = Preds.end(); I != E; ++I) { + if (I->second) + std::cerr << " ch "; + else + std::cerr << " val "; + I->first->dump(G); + } + } + if (Succs.size() != 0) { + std::cerr << " Successors:\n"; + for (std::set >::const_iterator I = Succs.begin(), + E = Succs.end(); I != E; ++I) { + if (I->second) + std::cerr << " ch "; + else + std::cerr << " val "; + I->first->dump(G); + } + } + std::cerr << "\n"; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 7e87b525a07..34136d847ce 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// // -// This implements bottom-up and top-down list schedulers, using standard -// algorithms. The basic approach uses a priority queue of available nodes to -// schedule. One at a time, nodes are taken from the priority queue (thus in -// priority order), checked for legality to schedule, and emitted if legal. +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. // // Nodes may not be legal to schedule either due to structural hazards (e.g. // pipeline or resource constraints) or because an input to the instruction has @@ -29,157 +29,20 @@ #include #include #include -#include -#include -#include "llvm/Support/CommandLine.h" using namespace llvm; -namespace { - cl::opt SchedVertically("sched-vertically", cl::Hidden); - cl::opt SchedLowerDefNUse("sched-lower-defnuse", cl::Hidden); -} - namespace { Statistic<> NumNoops ("scheduler", "Number of noops inserted"); Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); - - /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or - /// a group of nodes flagged together. - struct SUnit { - SDNode *Node; // Representative node. - std::vector FlaggedNodes; // All nodes flagged to Node. - - // Preds/Succs - The SUnits before/after us in the graph. The boolean value - // is true if the edge is a token chain edge, false if it is a value edge. - std::set > Preds; // All sunit predecessors. - std::set > Succs; // All sunit successors. - - short NumPredsLeft; // # of preds not scheduled. - short NumSuccsLeft; // # of succs not scheduled. - short NumChainPredsLeft; // # of chain preds not scheduled. - short NumChainSuccsLeft; // # of chain succs not scheduled. - bool isTwoAddress : 1; // Is a two-address instruction. - bool isDefNUseOperand : 1; // Is a def&use operand. - bool isPending : 1; // True once pending. - bool isAvailable : 1; // True once available. - bool isScheduled : 1; // True once scheduled. - unsigned short Latency; // Node latency. - unsigned CycleBound; // Upper/lower cycle to be scheduled at. - unsigned Cycle; // Once scheduled, the cycle of the op. - unsigned NodeNum; // Entry # of node in the node vector. - - SUnit(SDNode *node, unsigned nodenum) - : Node(node), NumPredsLeft(0), NumSuccsLeft(0), - NumChainPredsLeft(0), NumChainSuccsLeft(0), - isTwoAddress(false), isDefNUseOperand(false), - isPending(false), isAvailable(false), isScheduled(false), - Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {} - - void dump(const SelectionDAG *G) const; - void dumpAll(const SelectionDAG *G) const; - }; } -void SUnit::dump(const SelectionDAG *G) const { - std::cerr << "SU(" << NodeNum << "): "; - Node->dump(G); - std::cerr << "\n"; - if (FlaggedNodes.size() != 0) { - for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { - std::cerr << " "; - FlaggedNodes[i]->dump(G); - std::cerr << "\n"; - } - } -} - -void SUnit::dumpAll(const SelectionDAG *G) const { - dump(G); - - std::cerr << " # preds left : " << NumPredsLeft << "\n"; - std::cerr << " # succs left : " << NumSuccsLeft << "\n"; - std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; - std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; - std::cerr << " Latency : " << Latency << "\n"; - - if (Preds.size() != 0) { - std::cerr << " Predecessors:\n"; - for (std::set >::const_iterator I = Preds.begin(), - E = Preds.end(); I != E; ++I) { - if (I->second) - std::cerr << " ch "; - else - std::cerr << " val "; - I->first->dump(G); - } - } - if (Succs.size() != 0) { - std::cerr << " Successors:\n"; - for (std::set >::const_iterator I = Succs.begin(), - E = Succs.end(); I != E; ++I) { - if (I->second) - std::cerr << " ch "; - else - std::cerr << " val "; - I->first->dump(G); - } - } - std::cerr << "\n"; -} - -//===----------------------------------------------------------------------===// -/// SchedulingPriorityQueue - This interface is used to plug different -/// priorities computation algorithms into the list scheduler. It implements the -/// interface of a standard priority queue, where nodes are inserted in -/// arbitrary order and returned in priority order. The computation of the -/// priority and the representation of the queue are totally up to the -/// implementation to decide. -/// -namespace { -class SchedulingPriorityQueue { -public: - virtual ~SchedulingPriorityQueue() {} - - virtual void initNodes(const std::vector &SUnits) = 0; - virtual void releaseState() = 0; - - virtual bool empty() const = 0; - virtual void push(SUnit *U) = 0; - - virtual void push_all(const std::vector &Nodes) = 0; - virtual SUnit *pop() = 0; - - virtual void RemoveFromPriorityQueue(SUnit *SU) = 0; - - /// ScheduledNode - As each node is scheduled, this method is invoked. This - /// allows the priority function to adjust the priority of node that have - /// already been emitted. - virtual void ScheduledNode(SUnit *Node) {} -}; -} - - - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGList - The actual list scheduler implementation. This supports -/// both top-down and bottom-up scheduling. +/// top-down scheduling. /// class ScheduleDAGList : public ScheduleDAG { private: - // SDNode to SUnit mapping (many to one). - std::map SUnitMap; - - // The schedule. Null SUnit*'s represent noop instructions. - std::vector Sequence; - - // The scheduling units. - std::vector SUnits; - - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// AvailableQueue - The priority queue to use for the available SUnits. /// SchedulingPriorityQueue *AvailableQueue; @@ -194,20 +57,12 @@ private: /// HazardRec - The hazard recognizer to use. HazardRecognizer *HazardRec; - /// OpenNodes - Nodes with open live ranges, i.e. predecessors or successors - /// of scheduled nodes which are not themselves scheduled. - std::map > OpenNodes; - - /// RegPressureLimits - Keep track of upper limit of register pressure for - /// each register class that allows the scheduler to go into vertical mode. - std::map RegPressureLimits; - public: ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, - const TargetMachine &tm, bool isbottomup, + const TargetMachine &tm, SchedulingPriorityQueue *availqueue, HazardRecognizer *HR) - : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), + : ScheduleDAG(dag, bb, tm), AvailableQueue(availqueue), HazardRec(HR) { } @@ -218,202 +73,16 @@ public: void Schedule(); - void dumpSchedule() const; - private: - SUnit *NewSUnit(SDNode *N); - void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); void ReleaseSucc(SUnit *SuccSU, bool isChain); - void ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, bool Veritical=true); - void ScheduleVertically(SUnit *SU, unsigned& CurCycle); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); - void ListScheduleBottomUp(); - void BuildSchedUnits(); - void EmitSchedule(); }; } // end anonymous namespace HazardRecognizer::~HazardRecognizer() {} -/// NewSUnit - Creates a new SUnit and return a ptr to it. -SUnit *ScheduleDAGList::NewSUnit(SDNode *N) { - SUnits.push_back(SUnit(N, SUnits.size())); - return &SUnits.back(); -} - -/// BuildSchedUnits - Build SUnits from the selection dag that we are input. -/// This SUnit graph is similar to the SelectionDAG, but represents flagged -/// together nodes with a single SUnit. -void ScheduleDAGList::BuildSchedUnits() { - // Reserve entries in the vector for each of the SUnits we are creating. This - // ensure that reallocation of the vector won't happen, so SUnit*'s won't get - // invalidated. - SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); - - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - - for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), - E = DAG.allnodes_end(); NI != E; ++NI) { - if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. - continue; - - // If this node has already been processed, stop now. - if (SUnitMap[NI]) continue; - - SUnit *NodeSUnit = NewSUnit(NI); - - // See if anything is flagged to this node, if so, add them to flagged - // nodes. Nodes can have at most one flag input and one flag output. Flags - // are required the be the last operand and result of a node. - - // Scan up, adding flagged preds to FlaggedNodes. - SDNode *N = NI; - while (N->getNumOperands() && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { - N = N->getOperand(N->getNumOperands()-1).Val; - NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; - } - - // Scan down, adding this node and any flagged succs to FlaggedNodes if they - // have a user of the flag operand. - N = NI; - while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { - SDOperand FlagVal(N, N->getNumValues()-1); - - // There are either zero or one users of the Flag result. - bool HasFlagUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); - UI != E; ++UI) - if (FlagVal.isOperand(*UI)) { - HasFlagUse = true; - NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; - N = *UI; - break; - } - if (!HasFlagUse) break; - } - - // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. - // Update the SUnit - NodeSUnit->Node = N; - SUnitMap[N] = NodeSUnit; - - // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. - if (InstrItins.isEmpty()) { - // No latency information. - NodeSUnit->Latency = 1; - } else { - NodeSUnit->Latency = 0; - if (N->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { - SDNode *FNode = NodeSUnit->FlaggedNodes[i]; - if (FNode->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - } - } - } - - // Pass 2: add the preds, succs, etc. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { - SUnit *SU = &SUnits[su]; - SDNode *MainNode = SU->Node; - - if (MainNode->isTargetOpcode()) { - unsigned Opc = MainNode->getTargetOpcode(); - if (TII->isTwoAddrInstr(Opc)) { - SU->isTwoAddress = true; - SDNode *OpN = MainNode->getOperand(0).Val; - SUnit *OpSU = SUnitMap[OpN]; - if (OpSU) - OpSU->isDefNUseOperand = true; - } - } - - // Find all predecessors and successors of the group. - // Temporarily add N to make code simpler. - SU->FlaggedNodes.push_back(MainNode); - - for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { - SDNode *N = SU->FlaggedNodes[n]; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *OpN = N->getOperand(i).Val; - if (isPassiveNode(OpN)) continue; // Not scheduled. - SUnit *OpSU = SUnitMap[OpN]; - assert(OpSU && "Node has no SUnit!"); - if (OpSU == SU) continue; // In the same group. - - MVT::ValueType OpVT = N->getOperand(i).getValueType(); - assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); - bool isChain = OpVT == MVT::Other; - - if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) { - if (!isChain) { - SU->NumPredsLeft++; - } else { - SU->NumChainPredsLeft++; - } - } - if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) { - if (!isChain) { - OpSU->NumSuccsLeft++; - } else { - OpSU->NumChainSuccsLeft++; - } - } - } - } - - // Remove MainNode from FlaggedNodes again. - SU->FlaggedNodes.pop_back(); - } - - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(&DAG)); - return; -} - -/// EmitSchedule - Emit the machine code in scheduled order. -void ScheduleDAGList::EmitSchedule() { - std::map VRBaseMap; - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) { - for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) - EmitNode(SU->FlaggedNodes[j], VRBaseMap); - EmitNode(SU->Node, VRBaseMap); - } else { - // Null SUnit* is a noop. - EmitNoop(); - } - } -} - -/// dump - dump the schedule. -void ScheduleDAGList::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(&DAG); - else - std::cerr << "**** NOOP ****\n"; - } -} - /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { DEBUG(std::cerr << "********** List Scheduling **********\n"); @@ -423,11 +92,7 @@ void ScheduleDAGList::Schedule() { AvailableQueue->initNodes(SUnits); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + ListScheduleTopDown(); AvailableQueue->releaseState(); @@ -439,273 +104,6 @@ void ScheduleDAGList::Schedule() { EmitSchedule(); } -//===----------------------------------------------------------------------===// -// Bottom-Up Scheduling -//===----------------------------------------------------------------------===// - -static const TargetRegisterClass *getRegClass(SUnit *SU, - const TargetInstrInfo *TII, - const MRegisterInfo *MRI, - SSARegMap *RegMap) { - if (SU->Node->isTargetOpcode()) { - unsigned Opc = SU->Node->getTargetOpcode(); - const TargetInstrDescriptor &II = TII->get(Opc); - return II.OpInfo->RegClass; - } else { - assert(SU->Node->getOpcode() == ISD::CopyFromReg); - unsigned SrcReg = cast(SU->Node->getOperand(1))->getReg(); - if (MRegisterInfo::isVirtualRegister(SrcReg)) - return RegMap->getRegClass(SrcReg); - else { - for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), - E = MRI->regclass_end(); I != E; ++I) - if ((*I)->hasType(SU->Node->getValueType(0)) && - (*I)->contains(SrcReg)) - return *I; - assert(false && "Couldn't find register class for reg copy!"); - } - return NULL; - } -} - -static unsigned getNumResults(SUnit *SU) { - unsigned NumResults = 0; - for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) { - MVT::ValueType VT = SU->Node->getValueType(i); - if (VT != MVT::Other && VT != MVT::Flag) - NumResults++; - } - return NumResults; -} - -/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to -/// the Available queue is the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain, - unsigned CurCycle) { - // FIXME: the distance between two nodes is not always == the predecessor's - // latency. For example, the reader can very well read the register written - // by the predecessor later than the issue cycle. It also depends on the - // interrupt model (drain vs. freeze). - PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); - - if (!isChain) - PredSU->NumSuccsLeft--; - else - PredSU->NumChainSuccsLeft--; - -#ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { - std::cerr << "*** List scheduling failed! ***\n"; - PredSU->dump(&DAG); - std::cerr << " has been released too many times!\n"; - assert(0); - } -#endif - - if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { - // EntryToken has to go last! Special case it here. - if (PredSU->Node->getOpcode() != ISD::EntryToken) { - PredSU->isAvailable = true; - AvailableQueue->push(PredSU); - } - } - - if (getNumResults(PredSU) > 0) { - const TargetRegisterClass *RegClass = getRegClass(PredSU, TII, MRI, RegMap); - OpenNodes[RegClass].insert(PredSU); - } -} - -/// SharesOperandWithTwoAddr - Check if there is a unscheduled two-address node -/// with which SU shares an operand. If so, returns the node. -static SUnit *SharesOperandWithTwoAddr(SUnit *SU) { - assert(!SU->isTwoAddress && "Node cannot be two-address op"); - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - SUnit *PredSU = I->first; - for (std::set >::iterator II = - PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { - if (II->second) continue; - SUnit *SSU = II->first; - if (SSU->isTwoAddress && !SSU->isScheduled) { - return SSU; - } - } - } - return NULL; -} - -static bool isFloater(const SUnit *SU) { - unsigned Opc = SU->Node->getOpcode(); - return (Opc != ISD::CopyFromReg && SU->NumPredsLeft == 0); -} - -static bool isSimpleFloaterUse(const SUnit *SU) { - unsigned NumOps = 0; - for (std::set >::const_iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - if (++NumOps > 1) - return false; - if (!isFloater(I->first)) - return false; - } - return true; -} - -/// ScheduleVertically - Schedule vertically. That is, follow up the D&U chain -/// (of two-address code) and schedule floaters aggressively. -void ScheduleDAGList::ScheduleVertically(SUnit *SU, unsigned& CurCycle) { - // Try scheduling Def&Use operand if register pressure is low. - const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); - unsigned Pressure = OpenNodes[RegClass].size(); - unsigned Limit = RegPressureLimits[RegClass]; - - // See if we can schedule any predecessor that takes no registers. - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - - SUnit *PredSU = I->first; - if (!PredSU->isAvailable || PredSU->isScheduled) - continue; - - if (isFloater(PredSU)) { - DEBUG(std::cerr<<"*** Scheduling floater\n"); - AvailableQueue->RemoveFromPriorityQueue(PredSU); - ScheduleNodeBottomUp(PredSU, CurCycle, false); - } - } - - SUnit *DUSU = NULL; - if (SU->isTwoAddress && Pressure < Limit) { - DUSU = SUnitMap[SU->Node->getOperand(0).Val]; - if (!DUSU->isAvailable || DUSU->isScheduled) - DUSU = NULL; - else if (!DUSU->isTwoAddress) { - SUnit *SSU = SharesOperandWithTwoAddr(DUSU); - if (SSU && SSU->isAvailable) { - AvailableQueue->RemoveFromPriorityQueue(SSU); - ScheduleNodeBottomUp(SSU, CurCycle, false); - Pressure = OpenNodes[RegClass].size(); - if (Pressure >= Limit) - DUSU = NULL; - } - } - } - - if (DUSU) { - DEBUG(std::cerr<<"*** Low register pressure: scheduling D&U operand\n"); - AvailableQueue->RemoveFromPriorityQueue(DUSU); - ScheduleNodeBottomUp(DUSU, CurCycle, false); - Pressure = OpenNodes[RegClass].size(); - ScheduleVertically(DUSU, CurCycle); - } -} - -/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending -/// count of its predecessors. If a predecessor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, - bool Vertical) { - DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(&DAG)); - SU->Cycle = CurCycle; - - AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); - - // Bottom up: release predecessors - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - ReleasePred(I->first, I->second, CurCycle); - SU->isScheduled = true; - CurCycle++; - - if (getNumResults(SU) != 0) { - const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); - OpenNodes[RegClass].erase(SU); - - if (SchedVertically && Vertical) - ScheduleVertically(SU, CurCycle); - } -} - -/// isReady - True if node's lower cycle bound is less or equal to the current -/// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurCycle) { - return SU->CycleBound <= CurCycle; -} - -/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up -/// schedulers. -void ScheduleDAGList::ListScheduleBottomUp() { - // Determine rough register pressure limit. - for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(), - E = MRI->regclass_end(); RCI != E; ++RCI) { - const TargetRegisterClass *RC = *RCI; - unsigned Limit = RC->getNumRegs(); - Limit = (Limit > 2) ? Limit - 2 : 0; - std::map::iterator RPI = - RegPressureLimits.find(RC); - if (RPI == RegPressureLimits.end()) - RegPressureLimits[RC] = Limit; - else { - unsigned &OldLimit = RegPressureLimits[RC]; - if (Limit < OldLimit) - OldLimit = Limit; - } - } - - unsigned CurCycle = 0; - // Add root to Available queue. - AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - std::vector NotReady; - SUnit *CurNode = NULL; - while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (!isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); - } - - // Add the nodes that aren't ready back onto the available list. - AvailableQueue->push_all(NotReady); - NotReady.clear(); - - ScheduleNodeBottomUp(CurNode, CurCycle); - } - - // Add entry node last - if (DAG.getEntryNode().Val != DAG.getRoot().Val) { - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; - Sequence.push_back(Entry); - } - - // Reverse the order if it is bottom up. - std::reverse(Sequence.begin(), Sequence.end()); - - -#ifndef NDEBUG - // Verify that all SUnits were scheduled. - bool AnyNotSched = false; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { - if (!AnyNotSched) - std::cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(&DAG); - std::cerr << "has not been scheduled!\n"; - AnyNotSched = true; - } - } - assert(!AnyNotSched); -#endif -} - //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -884,284 +282,6 @@ void ScheduleDAGList::ListScheduleTopDown() { #endif } -//===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Implementation -//===----------------------------------------------------------------------===// -// -// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers -// to reduce register pressure. -// -namespace { - template - class RegReductionPriorityQueue; - - /// Sorting functions for the Available queue. - struct ls_rr_sort : public std::binary_function { - RegReductionPriorityQueue *SPQ; - ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} - ls_rr_sort(const ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; - }; -} // end anonymous namespace - -namespace { - template - class RegReductionPriorityQueue : public SchedulingPriorityQueue { - // SUnits - The SUnits for the current graph. - const std::vector *SUnits; - - // SethiUllmanNumbers - The SethiUllman number for each node. - std::vector SethiUllmanNumbers; - - std::priority_queue, SF> Queue; - public: - RegReductionPriorityQueue() : - Queue(ls_rr_sort(this)) {} - - void initNodes(const std::vector &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - if (SchedLowerDefNUse) - AddPseudoTwoAddrDeps(); - // Calculate node priorities. - CalculatePriorities(); - } - void releaseState() { - SUnits = 0; - SethiUllmanNumbers.clear(); - } - - int getSethiUllmanNumber(unsigned NodeNum) const { - assert(NodeNum < SethiUllmanNumbers.size()); - return SethiUllmanNumbers[NodeNum]; - } - - bool empty() const { return Queue.empty(); } - - void push(SUnit *U) { - Queue.push(U); - } - void push_all(const std::vector &Nodes) { - for (unsigned i = 0, e = Nodes.size(); i != e; ++i) - Queue.push(Nodes[i]); - } - - SUnit *pop() { - SUnit *V = Queue.top(); - Queue.pop(); - return V; - } - - /// RemoveFromPriorityQueue - This is a really inefficient way to remove a - /// node from a priority queue. We should roll our own heap to make this - /// better or something. - void RemoveFromPriorityQueue(SUnit *SU) { - std::vector Temp; - - assert(!Queue.empty() && "Not in queue!"); - while (Queue.top() != SU) { - Temp.push_back(Queue.top()); - Queue.pop(); - assert(!Queue.empty() && "Not in queue!"); - } - - // Remove the node from the PQ. - Queue.pop(); - - // Add all the other nodes back. - for (unsigned i = 0, e = Temp.size(); i != e; ++i) - Queue.push(Temp[i]); - } - - private: - void AddPseudoTwoAddrDeps(); - void CalculatePriorities(); - int CalcNodePriority(const SUnit *SU); - }; -} - -bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - unsigned LeftNum = left->NodeNum; - unsigned RightNum = right->NodeNum; - bool LIsTarget = left->Node->isTargetOpcode(); - bool RIsTarget = right->Node->isTargetOpcode(); - int LPriority = SPQ->getSethiUllmanNumber(LeftNum); - int RPriority = SPQ->getSethiUllmanNumber(RightNum); - bool LIsFloater = LIsTarget && (LPriority == 1 || LPriority == 0); - bool RIsFloater = RIsTarget && (RPriority == 1 || RPriority == 0); - int LBonus = 0; - int RBonus = 0; - - // Schedule floaters (e.g. load from some constant address) and those nodes - // with a single predecessor each first. They maintain / reduce register - // pressure. - if (LIsFloater) - LBonus += 2; - if (RIsFloater) - RBonus += 2; - - if (!SchedLowerDefNUse) { - // Special tie breaker: if two nodes share a operand, the one that use it - // as a def&use operand is preferred. - if (LIsTarget && RIsTarget) { - if (left->isTwoAddress && !right->isTwoAddress) { - SDNode *DUNode = left->Node->getOperand(0).Val; - if (DUNode->isOperand(right->Node)) - LBonus += 2; - } - if (!left->isTwoAddress && right->isTwoAddress) { - SDNode *DUNode = right->Node->getOperand(0).Val; - if (DUNode->isOperand(left->Node)) - RBonus += 2; - } - } - } - - if (LPriority+LBonus < RPriority+RBonus) - return true; - else if (LPriority+LBonus == RPriority+RBonus) - if (left->NumPredsLeft > right->NumPredsLeft) - return true; - else if (left->NumPredsLeft+LBonus == right->NumPredsLeft+RBonus) - if (left->CycleBound > right->CycleBound) - return true; - return false; -} - -static inline bool isCopyFromLiveIn(const SUnit *SU) { - SDNode *N = SU->Node; - return N->getOpcode() == ISD::CopyFromReg && - N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; -} - -// FIXME: This is probably too slow! -static void isReachable(SUnit *SU, SUnit *TargetSU, - std::set &Visited, bool &Reached) { - if (Reached) return; - if (SU == TargetSU) { - Reached = true; - return; - } - if (!Visited.insert(SU).second) return; - - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - isReachable(I->first, TargetSU, Visited, Reached); -} - -static bool isReachable(SUnit *SU, SUnit *TargetSU) { - std::set Visited; - bool Reached = false; - isReachable(SU, TargetSU, Visited, Reached); - return Reached; -} - -static SUnit *getDefUsePredecessor(SUnit *SU) { - SDNode *DU = SU->Node->getOperand(0).Val; - for (std::set >::iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; // ignore chain preds - SUnit *PredSU = I->first; - if (PredSU->Node == DU) - return PredSU; - } - - // Must be flagged. - return NULL; -} - -static bool canClobber(SUnit *SU, SUnit *Op) { - if (SU->isTwoAddress) - return Op == getDefUsePredecessor(SU); - return false; -} - -/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses -/// it as a def&use operand. Add a pseudo control edge from it to the other -/// node (if it won't create a cycle) so the two-address one will be scheduled -/// first (lower in the schedule). -template -void RegReductionPriorityQueue::AddPseudoTwoAddrDeps() { - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = (SUnit *)&((*SUnits)[i]); - SDNode *Node = SU->Node; - if (!Node->isTargetOpcode()) - continue; - - if (SU->isTwoAddress) { - unsigned Depth = SU->Node->getNodeDepth(); - SUnit *DUSU = getDefUsePredecessor(SU); - if (!DUSU) continue; - - for (std::set >::iterator I = DUSU->Succs.begin(), - E = DUSU->Succs.end(); I != E; ++I) { - SUnit *SuccSU = I->first; - if (SuccSU != SU && !canClobber(SuccSU, DUSU)) { - if (SuccSU->Node->getNodeDepth() <= Depth+2 && - !isReachable(SuccSU, SU)) { - DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum - << " to SU #" << SuccSU->NodeNum << "\n"); - if (SU->Preds.insert(std::make_pair(SuccSU, true)).second) - SU->NumChainPredsLeft++; - if (SuccSU->Succs.insert(std::make_pair(SU, true)).second) - SuccSU->NumChainSuccsLeft++; - } - } - } - } - } -} - -/// CalcNodePriority - Priority is the Sethi Ullman number. -/// Smaller number is the higher priority. -template -int RegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { - int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; - if (SethiUllmanNumber != 0) - return SethiUllmanNumber; - - unsigned Opc = SU->Node->getOpcode(); - if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) - SethiUllmanNumber = INT_MAX - 10; - else if (SU->NumSuccsLeft == 0) - // If SU does not have a use, i.e. it doesn't produce a value that would - // be consumed (e.g. store), then it terminates a chain of computation. - // Give it a small SethiUllman number so it will be scheduled right before its - // predecessors that it doesn't lengthen their live ranges. - SethiUllmanNumber = INT_MIN + 10; - else if (SU->NumPredsLeft == 0 && - (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) - SethiUllmanNumber = 1; - else { - int Extra = 0; - for (std::set >::const_iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; // ignore chain preds - SUnit *PredSU = I->first; - int PredSethiUllman = CalcNodePriority(PredSU); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber && !I->second) - Extra++; - } - - SethiUllmanNumber += Extra; - } - - return SethiUllmanNumber; -} - -/// CalculatePriorities - Calculate priorities of all scheduling units. -template -void RegReductionPriorityQueue::CalculatePriorities() { - SethiUllmanNumbers.assign(SUnits->size(), 0); - - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodePriority(&(*SUnits)[i]); -} - //===----------------------------------------------------------------------===// // LatencyPriorityQueue Implementation //===----------------------------------------------------------------------===// @@ -1240,6 +360,17 @@ public: return V; } + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + /// RemoveFromPriorityQueue - This is a really inefficient way to remove a /// node from a priority queue. We should roll our own heap to make this /// better or something. @@ -1260,17 +391,6 @@ public: for (unsigned i = 0, e = Temp.size(); i != e; ++i) Queue.push(Temp[i]); } - - // ScheduledNode - As nodes are scheduled, we look to see if there are any - // successor nodes that have a single unscheduled predecessor. If so, that - // single predecessor has a higher priority, since scheduling it will make - // the node available. - void ScheduledNode(SUnit *Node); - -private: - void CalculatePriorities(); - int CalcLatency(const SUnit &SU); - void AdjustPriorityOfUnscheduledPreds(SUnit *SU); }; } @@ -1388,19 +508,12 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG, - MachineBasicBlock *BB) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), true, - new RegReductionPriorityQueue(), - new HazardRecognizer()); -} - /// createTDListDAGScheduler - This creates a top-down list scheduler with the /// specified hazard recognizer. ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG, MachineBasicBlock *BB, HazardRecognizer *HR) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), false, + return new ScheduleDAGList(DAG, BB, DAG.getTarget(), new LatencyPriorityQueue(), HR); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 00000000000..acd6904ce25 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,813 @@ +//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include +#include +#include +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +namespace { + cl::opt SchedLowerDefNUse("sched-lower-defnuse", cl::Hidden); +} + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// + +class ScheduleDAGRRList : public ScheduleDAG { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + +public: + ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, + const TargetMachine &tm, bool isbottomup, + SchedulingPriorityQueue *availqueue) + : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), + AvailableQueue(availqueue) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); + void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle); + void ScheduleNodeTopDown(SUnit *SU, unsigned& CurCycle); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DEBUG(std::cerr << "********** List Scheduling **********\n"); + + // Build scheduling units. + BuildSchedUnits(); + + CalculateDepths(); + CalculateHeights(); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(&DAG)); + + AvailableQueue->initNodes(SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); + + DEBUG(std::cerr << "*** Final schedule ***\n"); + DEBUG(dumpSchedule()); + DEBUG(std::cerr << "\n"); + + // Emit in scheduled order + EmitSchedule(); +} + + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +static const TargetRegisterClass *getRegClass(SUnit *SU, + const TargetInstrInfo *TII, + const MRegisterInfo *MRI, + SSARegMap *RegMap) { + if (SU->Node->isTargetOpcode()) { + unsigned Opc = SU->Node->getTargetOpcode(); + const TargetInstrDescriptor &II = TII->get(Opc); + return II.OpInfo->RegClass; + } else { + assert(SU->Node->getOpcode() == ISD::CopyFromReg); + unsigned SrcReg = cast(SU->Node->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(SrcReg)) + return RegMap->getRegClass(SrcReg); + else { + for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), + E = MRI->regclass_end(); I != E; ++I) + if ((*I)->hasType(SU->Node->getValueType(0)) && + (*I)->contains(SrcReg)) + return *I; + assert(false && "Couldn't find register class for reg copy!"); + } + return NULL; + } +} + +static unsigned getNumResults(SUnit *SU) { + unsigned NumResults = 0; + for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) { + MVT::ValueType VT = SU->Node->getValueType(i); + if (VT != MVT::Other && VT != MVT::Flag) + NumResults++; + } + return NumResults; +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the Available queue is the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, + unsigned CurCycle) { + // FIXME: the distance between two nodes is not always == the predecessor's + // latency. For example, the reader can very well read the register written + // by the predecessor later than the issue cycle. It also depends on the + // interrupt model (drain vs. freeze). + PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); + + if (!isChain) + PredSU->NumSuccsLeft--; + else + PredSU->NumChainSuccsLeft--; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { + std::cerr << "*** List scheduling failed! ***\n"; + PredSU->dump(&DAG); + std::cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { + // EntryToken has to go last! Special case it here. + if (PredSU->Node->getOpcode() != ISD::EntryToken) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle) { + DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(&DAG)); + SU->Cycle = CurCycle; + + AvailableQueue->ScheduledNode(SU); + Sequence.push_back(SU); + + // Bottom up: release predecessors + for (std::set >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) + ReleasePred(I->first, I->second, CurCycle); + SU->isScheduled = true; + CurCycle++; +} + +/// isReady - True if node's lower cycle bound is less or equal to the current +/// scheduling cycle. Always true if all nodes have uniform latency 1. +static inline bool isReady(SUnit *SU, unsigned CurCycle) { + return SU->CycleBound <= CurCycle; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + // Add root to Available queue. + AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector NotReady; + SUnit *CurNode = NULL; + while (!AvailableQueue->empty()) { + SUnit *CurNode = AvailableQueue->pop(); + while (!isReady(CurNode, CurCycle)) { + NotReady.push_back(CurNode); + CurNode = AvailableQueue->pop(); + } + + // Add the nodes that aren't ready back onto the available list. + AvailableQueue->push_all(NotReady); + NotReady.clear(); + + ScheduleNodeBottomUp(CurNode, CurCycle); + } + + // Add entry node last + if (DAG.getEntryNode().Val != DAG.getRoot().Val) { + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + Sequence.push_back(Entry); + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { + if (!AnyNotSched) + std::cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(&DAG); + std::cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + } + assert(!AnyNotSched); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, + unsigned CurCycle) { + // FIXME: the distance between two nodes is not always == the predecessor's + // latency. For example, the reader can very well read the register written + // by the predecessor later than the issue cycle. It also depends on the + // interrupt model (drain vs. freeze). + SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); + + if (!isChain) + SuccSU->NumPredsLeft--; + else + SuccSU->NumChainPredsLeft--; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { + std::cerr << "*** List scheduling failed! ***\n"; + SuccSU->dump(&DAG); + std::cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned& CurCycle) { + DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(&DAG)); + SU->Cycle = CurCycle; + + AvailableQueue->ScheduledNode(SU); + Sequence.push_back(SU); + + // Top down: release successors + for (std::set >::iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) + ReleaseSucc(I->first, I->second, CurCycle); + SU->isScheduled = true; + CurCycle++; +} + +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // Emit the entry node first. + ScheduleNodeTopDown(Entry, CurCycle); + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector NotReady; + SUnit *CurNode = NULL; + while (!AvailableQueue->empty()) { + SUnit *CurNode = AvailableQueue->pop(); + while (!isReady(CurNode, CurCycle)) { + NotReady.push_back(CurNode); + CurNode = AvailableQueue->pop(); + } + + // Add the nodes that aren't ready back onto the available list. + AvailableQueue->push_all(NotReady); + NotReady.clear(); + + ScheduleNodeTopDown(CurNode, CurCycle); + } + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (!AnyNotSched) + std::cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(&DAG); + std::cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + } + assert(!AnyNotSched); +#endif +} + + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template + class RegReductionPriorityQueue; + + /// Sorting functions for the Available queue. + struct bu_ls_rr_sort : public std::binary_function { + RegReductionPriorityQueue *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + struct td_ls_rr_sort : public std::binary_function { + RegReductionPriorityQueue *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +namespace { + template + class RegReductionPriorityQueue : public SchedulingPriorityQueue { + std::priority_queue, SF> Queue; + + public: + RegReductionPriorityQueue() : + Queue(SF(this)) {} + + virtual void initNodes(const std::vector &sunits) {} + virtual void releaseState() {} + + virtual int getSethiUllmanNumber(unsigned NodeNum) const { + return 0; + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push(U); + } + void push_all(const std::vector &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + Queue.push(Nodes[i]); + } + + SUnit *pop() { + SUnit *V = Queue.top(); + Queue.pop(); + return V; + } + }; + + template + class BURegReductionPriorityQueue : public RegReductionPriorityQueue { + // SUnits - The SUnits for the current graph. + const std::vector *SUnits; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector SethiUllmanNumbers; + + public: + BURegReductionPriorityQueue() {} + + void initNodes(const std::vector &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + if (SchedLowerDefNUse) + AddPseudoTwoAddrDeps(); + // Calculate node priorities. + CalculatePriorities(); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + int getSethiUllmanNumber(unsigned NodeNum) const { + assert(NodeNum < SethiUllmanNumbers.size()); + return SethiUllmanNumbers[NodeNum]; + } + + private: + void AddPseudoTwoAddrDeps(); + void CalculatePriorities(); + int CalcNodePriority(const SUnit *SU); + }; + + + template + class TDRegReductionPriorityQueue : public RegReductionPriorityQueue { + // SUnits - The SUnits for the current graph. + const std::vector *SUnits; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector SethiUllmanNumbers; + + public: + TDRegReductionPriorityQueue() {} + + void initNodes(const std::vector &sunits) { + SUnits = &sunits; + // Calculate node priorities. + CalculatePriorities(); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + int getSethiUllmanNumber(unsigned NodeNum) const { + assert(NodeNum < SethiUllmanNumbers.size()); + return SethiUllmanNumbers[NodeNum]; + } + + private: + void CalculatePriorities(); + int CalcNodePriority(const SUnit *SU); + }; +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LeftNum = left->NodeNum; + unsigned RightNum = right->NodeNum; + bool LIsTarget = left->Node->isTargetOpcode(); + bool RIsTarget = right->Node->isTargetOpcode(); + int LPriority = SPQ->getSethiUllmanNumber(LeftNum); + int RPriority = SPQ->getSethiUllmanNumber(RightNum); + bool LIsFloater = LIsTarget && (LPriority == 1 || LPriority == 0); + bool RIsFloater = RIsTarget && (RPriority == 1 || RPriority == 0); + int LBonus = 0; + int RBonus = 0; + + // Schedule floaters (e.g. load from some constant address) and those nodes + // with a single predecessor each first. They maintain / reduce register + // pressure. + if (LIsFloater) + LBonus += 2; + if (RIsFloater) + RBonus += 2; + + if (!SchedLowerDefNUse) { + // Special tie breaker: if two nodes share a operand, the one that use it + // as a def&use operand is preferred. + if (LIsTarget && RIsTarget) { + if (left->isTwoAddress && !right->isTwoAddress) { + SDNode *DUNode = left->Node->getOperand(0).Val; + if (DUNode->isOperand(right->Node)) + LBonus += 2; + } + if (!left->isTwoAddress && right->isTwoAddress) { + SDNode *DUNode = right->Node->getOperand(0).Val; + if (DUNode->isOperand(left->Node)) + RBonus += 2; + } + } + } + + if (LPriority+LBonus < RPriority+RBonus) + return true; + else if (LPriority+LBonus == RPriority+RBonus) + if (left->NumPredsLeft > right->NumPredsLeft) + return true; + else if (left->NumPredsLeft+LBonus == right->NumPredsLeft+RBonus) + if (left->CycleBound > right->CycleBound) + return true; + return false; +} + +static inline bool isCopyFromLiveIn(const SUnit *SU) { + SDNode *N = SU->Node; + return N->getOpcode() == ISD::CopyFromReg && + N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; +} + +// FIXME: This is probably too slow! +static void isReachable(SUnit *SU, SUnit *TargetSU, + std::set &Visited, bool &Reached) { + if (Reached) return; + if (SU == TargetSU) { + Reached = true; + return; + } + if (!Visited.insert(SU).second) return; + + for (std::set >::iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) + isReachable(I->first, TargetSU, Visited, Reached); +} + +static bool isReachable(SUnit *SU, SUnit *TargetSU) { + std::set Visited; + bool Reached = false; + isReachable(SU, TargetSU, Visited, Reached); + return Reached; +} + +static SUnit *getDefUsePredecessor(SUnit *SU) { + SDNode *DU = SU->Node->getOperand(0).Val; + for (std::set >::iterator + I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; // ignore chain preds + SUnit *PredSU = I->first; + if (PredSU->Node == DU) + return PredSU; + } + + // Must be flagged. + return NULL; +} + +static bool canClobber(SUnit *SU, SUnit *Op) { + if (SU->isTwoAddress) + return Op == getDefUsePredecessor(SU); + return false; +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). +template +void BURegReductionPriorityQueue::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = (SUnit *)&((*SUnits)[i]); + SDNode *Node = SU->Node; + if (!Node->isTargetOpcode()) + continue; + + if (SU->isTwoAddress) { + unsigned Depth = SU->Node->getNodeDepth(); + SUnit *DUSU = getDefUsePredecessor(SU); + if (!DUSU) continue; + + for (std::set >::iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->first; + if (SuccSU != SU && !canClobber(SuccSU, DUSU)) { + if (SuccSU->Node->getNodeDepth() <= Depth+2 && + !isReachable(SuccSU, SU)) { + DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"); + if (SU->Preds.insert(std::make_pair(SuccSU, true)).second) + SU->NumChainPredsLeft++; + if (SuccSU->Succs.insert(std::make_pair(SU, true)).second) + SuccSU->NumChainSuccsLeft++; + } + } + } + } + } +} + +/// CalcNodePriority - Priority is the Sethi Ullman number. +/// Smaller number is the higher priority. +template +int BURegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { + int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Opc = SU->Node->getOpcode(); + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + SethiUllmanNumber = INT_MAX - 10; + else if (SU->NumSuccsLeft == 0) + // If SU does not have a use, i.e. it doesn't produce a value that would + // be consumed (e.g. store), then it terminates a chain of computation. + // Give it a small SethiUllman number so it will be scheduled right before its + // predecessors that it doesn't lengthen their live ranges. + SethiUllmanNumber = INT_MIN + 10; + else if (SU->NumPredsLeft == 0 && + (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) + SethiUllmanNumber = 1; + else { + int Extra = 0; + for (std::set >::const_iterator + I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; // ignore chain preds + SUnit *PredSU = I->first; + int PredSethiUllman = CalcNodePriority(PredSU); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber && !I->second) + Extra++; + } + + SethiUllmanNumber += Extra; + } + + return SethiUllmanNumber; +} + +/// CalculatePriorities - Calculate priorities of all scheduling units. +template +void BURegReductionPriorityQueue::CalculatePriorities() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodePriority(&(*SUnits)[i]); +} + +static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { + unsigned Sum = 0; + for (std::set >::const_iterator + I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->first; + for (std::set >::const_iterator + II = SuccSU->Preds.begin(), EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->first; + if (!PredSU->isScheduled) + Sum++; + } + } + + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LeftNum = left->NodeNum; + unsigned RightNum = right->NodeNum; + int LPriority = SPQ->getSethiUllmanNumber(LeftNum); + int RPriority = SPQ->getSethiUllmanNumber(RightNum); + bool LIsTarget = left->Node->isTargetOpcode(); + bool RIsTarget = right->Node->isTargetOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; + unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + // Special tie breaker: if two nodes share a operand, the one that use it + // as a def&use operand is preferred. + if (LIsTarget && RIsTarget) { + if (left->isTwoAddress && !right->isTwoAddress) { + SDNode *DUNode = left->Node->getOperand(0).Val; + if (DUNode->isOperand(right->Node)) + RBonus += 2; + } + if (!left->isTwoAddress && right->isTwoAddress) { + SDNode *DUNode = right->Node->getOperand(0).Val; + if (DUNode->isOperand(left->Node)) + LBonus += 2; + } + } + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus < RPriority+RBonus) + return true; + else if (LPriority == RPriority) + if (left->Depth < right->Depth) + return true; + else if (left->Depth == right->Depth) + if (left->NumSuccsLeft > right->NumSuccsLeft) + return true; + else if (left->NumSuccsLeft == right->NumSuccsLeft) + if (left->CycleBound > right->CycleBound) + return true; + return false; +} + +/// CalcNodePriority - Priority is the Sethi Ullman number. +/// Smaller number is the higher priority. +template +int TDRegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { + int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Opc = SU->Node->getOpcode(); + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + SethiUllmanNumber = INT_MAX - 10; + else if (SU->NumSuccsLeft == 0) + // If SU does not have a use, i.e. it doesn't produce a value that would + // be consumed (e.g. store), then it terminates a chain of computation. + // Give it a small SethiUllman number so it will be scheduled right before its + // predecessors that it doesn't lengthen their live ranges. + SethiUllmanNumber = INT_MIN + 10; + else if (SU->NumPredsLeft == 0 && + (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) + SethiUllmanNumber = 1; + else { + int Extra = 0; + for (std::set >::const_iterator + I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->second) continue; // ignore chain preds + SUnit *PredSU = I->first; + int PredSethiUllman = CalcNodePriority(PredSU); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber && !I->second) + Extra++; + } + + SethiUllmanNumber += Extra; + } + + return SethiUllmanNumber; +} + +/// CalculatePriorities - Calculate priorities of all scheduling units. +template +void TDRegReductionPriorityQueue::CalculatePriorities() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodePriority(&(*SUnits)[i]); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGRRList(DAG, BB, DAG.getTarget(), true, + new BURegReductionPriorityQueue()); +} + +llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAG &DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGRRList(DAG, BB, DAG.getTarget(), false, + new TDRegReductionPriorityQueue()); +} + diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e9ac44693ad..6e24ad42050 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -58,36 +58,28 @@ ViewSchedDAGs("view-sched-dags", cl::Hidden, static const bool ViewISelDAGs = 0, ViewSchedDAGs = 0; #endif -// Scheduling heuristics -enum SchedHeuristics { - defaultScheduling, // Let the target specify its preference. - noScheduling, // No scheduling, emit breadth first sequence. - simpleScheduling, // Two pass, min. critical path, max. utilization. - simpleNoItinScheduling, // Same as above exact using generic latency. - listSchedulingBURR, // Bottom up reg reduction list scheduling. - listSchedulingTD // Top-down list scheduler. -}; - namespace { - cl::opt + cl::opt ISHeuristic( "sched", cl::desc("Choose scheduling style"), - cl::init(defaultScheduling), + cl::init(ScheduleDAG::defaultScheduling), cl::values( - clEnumValN(defaultScheduling, "default", + clEnumValN(ScheduleDAG::defaultScheduling, "default", "Target preferred scheduling style"), - clEnumValN(noScheduling, "none", + clEnumValN(ScheduleDAG::noScheduling, "none", "No scheduling: breadth first sequencing"), - clEnumValN(simpleScheduling, "simple", + clEnumValN(ScheduleDAG::simpleScheduling, "simple", "Simple two pass scheduling: minimize critical path " "and maximize processor utilization"), - clEnumValN(simpleNoItinScheduling, "simple-noitin", + clEnumValN(ScheduleDAG::simpleNoItinScheduling, "simple-noitin", "Simple two pass scheduling: Same as simple " "except using generic latency"), - clEnumValN(listSchedulingBURR, "list-burr", - "Bottom up register reduction list scheduling"), - clEnumValN(listSchedulingTD, "list-td", + clEnumValN(ScheduleDAG::listSchedulingBURR, "list-burr", + "Bottom-up register reduction list scheduling"), + clEnumValN(ScheduleDAG::listSchedulingTDRR, "list-tdrr", + "Top-down register reduction list scheduling"), + clEnumValN(ScheduleDAG::listSchedulingTD, "list-td", "Top-down list scheduler"), clEnumValEnd)); } // namespace @@ -3418,7 +3410,7 @@ void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) { switch (ISHeuristic) { default: assert(0 && "Unrecognized scheduling heuristic"); - case defaultScheduling: + case ScheduleDAG::defaultScheduling: if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) SL = createTDListDAGScheduler(DAG, BB, CreateTargetHazardRecognizer()); else { @@ -3427,19 +3419,22 @@ void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) { SL = createBURRListDAGScheduler(DAG, BB); } break; - case noScheduling: + case ScheduleDAG::noScheduling: SL = createBFS_DAGScheduler(DAG, BB); break; - case simpleScheduling: + case ScheduleDAG::simpleScheduling: SL = createSimpleDAGScheduler(false, DAG, BB); break; - case simpleNoItinScheduling: + case ScheduleDAG::simpleNoItinScheduling: SL = createSimpleDAGScheduler(true, DAG, BB); break; - case listSchedulingBURR: + case ScheduleDAG::listSchedulingBURR: SL = createBURRListDAGScheduler(DAG, BB); break; - case listSchedulingTD: + case ScheduleDAG::listSchedulingTDRR: + SL = createTDRRListDAGScheduler(DAG, BB); + break; + case ScheduleDAG::listSchedulingTD: SL = createTDListDAGScheduler(DAG, BB, CreateTargetHazardRecognizer()); break; }