From 4de099d8ca651e00fa5fac22bace4f4dba2d0292 Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Tue, 3 Nov 2009 20:57:50 +0000 Subject: [PATCH] Do a scheduling pass ignoring anti-dependencies to identify candidate registers that should be renamed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85939 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/LatencyPriorityQueue.h | 21 ++- include/llvm/CodeGen/ScheduleDAG.h | 38 ++--- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 36 +++-- lib/CodeGen/AggressiveAntiDepBreaker.h | 4 + lib/CodeGen/AntiDepBreaker.h | 17 ++- lib/CodeGen/CriticalAntiDepBreaker.cpp | 3 +- lib/CodeGen/CriticalAntiDepBreaker.h | 4 + lib/CodeGen/ExactHazardRecognizer.cpp | 2 +- lib/CodeGen/LatencyPriorityQueue.cpp | 9 +- lib/CodeGen/PostRASchedulerList.cpp | 149 +++++++++++++++----- lib/CodeGen/ScheduleDAG.cpp | 16 ++- 11 files changed, 216 insertions(+), 83 deletions(-) diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index 71fae2aeabb..a7cebee6064 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -39,12 +39,21 @@ namespace llvm { /// predecessor for. This is used as a tie-breaker heuristic for better /// mobility. std::vector NumNodesSolelyBlocking; - - PriorityQueue, latency_sort> Queue; -public: - LatencyPriorityQueue() : Queue(latency_sort(this)) { - } + /// IgnoreAntiDep - Ignore anti-dependencies + bool IgnoreAntiDep; + + /// Queue - The queue. + PriorityQueue, latency_sort> Queue; + +public: + LatencyPriorityQueue() : IgnoreAntiDep(false), Queue(latency_sort(this)) { + } + + void setIgnoreAntiDep(bool ignore) { + IgnoreAntiDep = ignore; + } + void initNodes(std::vector &sunits) { SUnits = &sunits; NumNodesSolelyBlocking.resize(SUnits->size(), 0); @@ -63,7 +72,7 @@ public: unsigned getLatency(unsigned NodeNum) const { assert(NodeNum < (*SUnits).size()); - return (*SUnits)[NodeNum].getHeight(); + return (*SUnits)[NodeNum].getHeight(IgnoreAntiDep); } unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index fdbbb1ee20e..d5e70203122 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -340,28 +340,34 @@ namespace llvm { void removePred(const SDep &D); /// getDepth - Return the depth of this node, which is the length of the - /// maximum path up to any node with has no predecessors. - unsigned getDepth() const { - if (!isDepthCurrent) const_cast(this)->ComputeDepth(); + /// maximum path up to any node with has no predecessors. If IgnoreAntiDep + /// is true, ignore anti-dependence edges. + unsigned getDepth(bool IgnoreAntiDep=false) const { + if (!isDepthCurrent) + const_cast(this)->ComputeDepth(IgnoreAntiDep); return Depth; } /// getHeight - Return the height of this node, which is the length of the - /// maximum path down to any node with has no successors. - unsigned getHeight() const { - if (!isHeightCurrent) const_cast(this)->ComputeHeight(); + /// maximum path down to any node with has no successors. If IgnoreAntiDep + /// is true, ignore anti-dependence edges. + unsigned getHeight(bool IgnoreAntiDep=false) const { + if (!isHeightCurrent) + const_cast(this)->ComputeHeight(IgnoreAntiDep); return Height; } - /// setDepthToAtLeast - If NewDepth is greater than this node's depth - /// value, set it to be the new depth value. This also recursively - /// marks successor nodes dirty. - void setDepthToAtLeast(unsigned NewDepth); + /// setDepthToAtLeast - If NewDepth is greater than this node's + /// depth value, set it to be the new depth value. This also + /// recursively marks successor nodes dirty. If IgnoreAntiDep is + /// true, ignore anti-dependence edges. + void setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep=false); - /// setDepthToAtLeast - If NewDepth is greater than this node's depth - /// value, set it to be the new height value. This also recursively - /// marks predecessor nodes dirty. - void setHeightToAtLeast(unsigned NewHeight); + /// setDepthToAtLeast - If NewDepth is greater than this node's + /// depth value, set it to be the new height value. This also + /// recursively marks predecessor nodes dirty. If IgnoreAntiDep is + /// true, ignore anti-dependence edges. + void setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep=false); /// setDepthDirty - Set a flag in this node to indicate that its /// stored Depth value will require recomputation the next time @@ -394,8 +400,8 @@ namespace llvm { void print(raw_ostream &O, const ScheduleDAG *G) const; private: - void ComputeDepth(); - void ComputeHeight(); + void ComputeDepth(bool IgnoreAntiDep); + void ComputeHeight(bool IgnoreAntiDep); }; //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5506a1f5260..ffb6315427a 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -14,7 +14,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "aggressive-antidep" +#define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -31,7 +31,7 @@ using namespace llvm; static cl::opt AntiDepTrials("agg-antidep-trials", cl::desc("Maximum number of anti-dependency breaking passes"), - cl::init(2), cl::Hidden); + cl::init(1), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) : GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) { @@ -265,18 +265,24 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, } /// AntiDepPathStep - Return SUnit that SU has an anti-dependence on. -static void AntiDepPathStep(SUnit *SU, std::vector& Edges) { - SmallSet Dups; +static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs, + std::vector& Edges) { + AntiDepBreaker::AntiDepRegSet RegSet; + for (unsigned i = 0, e = Regs.size(); i < e; ++i) + RegSet.insert(Regs[i]); + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if (P->getKind() == SDep::Anti) { unsigned Reg = P->getReg(); - if (Dups.count(Reg) == 0) { + if (RegSet.count(Reg) != 0) { Edges.push_back(&*P); - Dups.insert(Reg); + RegSet.erase(Reg); } } } + + assert(RegSet.empty() && "Expected all antidep registers to be found"); } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, @@ -593,6 +599,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::vector& SUnits, + CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { @@ -601,9 +608,15 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::multimap& RegRefs = State->GetRegRefs(); + // Nothing to do if no candidates. + if (Candidates.empty()) { + DEBUG(errs() << "\n===== No anti-dependency candidates\n"); + return 0; + } + // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. - if (SUnits.empty()) return false; + if (SUnits.empty()) return 0; // Manage saved state to enable multiple passes... if (AntiDepTrials > 1) { @@ -618,7 +631,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // ...need a map from MI to SUnit. std::map MISUnitMap; - DEBUG(errs() << "Breaking all anti-dependencies\n"); + DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << + " anti-dependencies\n"); for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; MISUnitMap.insert(std::pair(SU->getInstr(), SU)); @@ -655,8 +669,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::vector Edges; SUnit *PathSU = MISUnitMap[MI]; - if (PathSU) - AntiDepPathStep(PathSU, Edges); + AntiDepBreaker::CandidateMap::iterator + citer = Candidates.find(PathSU); + if (citer != Candidates.end()) + AntiDepPathStep(PathSU, citer->second, Edges); // Ignore KILL instructions (they form a group in ScanInstruction // but don't cause any anti-dependence breaking themselves) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 720f39080f2..5d9b40b00cc 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -131,6 +131,9 @@ namespace llvm { /// dependencies may be exposed, so multiple passes are required. unsigned GetMaxTrials(); + /// NeedCandidates - Candidates required. + bool NeedCandidates() { return true; } + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); @@ -138,6 +141,7 @@ namespace llvm { /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, + CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex); diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index dac700076a8..277508767e1 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -21,6 +21,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" namespace llvm { @@ -29,12 +31,20 @@ namespace llvm { /// anti-dependencies. class AntiDepBreaker { public: + typedef SmallSet AntiDepRegSet; + typedef SmallVector AntiDepRegVector; + typedef std::map CandidateMap; + virtual ~AntiDepBreaker(); /// GetMaxTrials - Return the maximum number of anti-dependence /// breaking attempts that will be made for a block. virtual unsigned GetMaxTrials() =0; + /// NeedCandidates - Return true if the schedule must provide + /// candidates with BreakAntiDependencies(). + virtual bool NeedCandidates() =0; + /// Start - Initialize anti-dep breaking for a new basic block. virtual void StartBlock(MachineBasicBlock *BB) =0; @@ -43,9 +53,10 @@ public: /// the number of anti-dependencies broken. /// virtual unsigned BreakAntiDependencies(std::vector& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, - unsigned InsertPosIndex) =0; + CandidateMap& Candidates, + MachineBasicBlock::iterator& Begin, + MachineBasicBlock::iterator& End, + unsigned InsertPosIndex) =0; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index ceb48d78457..984e0135b8c 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "critical-antidep" +#define DEBUG_TYPE "post-RA-sched" #include "CriticalAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -316,6 +316,7 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(std::vector& SUnits, + CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index ea7c73ce36e..5664d852fdf 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -68,6 +68,9 @@ namespace llvm { /// only a single pass unsigned GetMaxTrials() { return 1; } + /// NeedCandidates - Candidates not needed. + bool NeedCandidates() { return false; } + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); @@ -75,6 +78,7 @@ namespace llvm { /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, + CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex); diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index f35d196061e..36925b1ff37 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "exact-hazards" +#define DEBUG_TYPE "post-RA-sched" #include "ExactHazardRecognizer.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 2e7b89c494f..794ecf7bd19 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -55,6 +55,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; SUnit &Pred = *I->getSUnit(); if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the @@ -73,9 +74,11 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) + I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; if (getSingleUnscheduledPred(I->getSUnit()) == SU) ++NumNodesBlocking; + } NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; Queue.push(SU); @@ -88,8 +91,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { // the node available. void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) + I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; AdjustPriorityOfUnscheduledPreds(I->getSUnit()); + } } /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 7e85c48e13d..d5edb36b44b 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -175,10 +175,11 @@ namespace { void FixupKills(MachineBasicBlock *MBB); private: - void ReleaseSucc(SUnit *SU, SDep *SuccEdge); - void ReleaseSuccessors(SUnit *SU); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); - void ListScheduleTopDown(); + void ReleaseSucc(SUnit *SU, SDep *SuccEdge, bool IgnoreAntiDep); + void ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, bool IgnoreAntiDep); + void ListScheduleTopDown( + AntiDepBreaker::CandidateMap *AntiDepCandidates); void StartBlockForKills(MachineBasicBlock *BB); // ToggleKillFlag - Toggle a register operand kill flag. Other @@ -320,15 +321,32 @@ void SchedulePostRATDList::Schedule() { BuildSchedGraph(AA); if (AntiDepBreak != NULL) { + AntiDepBreaker::CandidateMap AntiDepCandidates; + const bool NeedCandidates = AntiDepBreak->NeedCandidates(); + for (unsigned i = 0, Trials = AntiDepBreak->GetMaxTrials(); i < Trials; ++i) { - DEBUG(errs() << "********** Break Anti-Deps, Trial " << + DEBUG(errs() << "\n********** Break Anti-Deps, Trial " << i << " **********\n"); + + // If candidates are required, then schedule forward ignoring + // anti-dependencies to collect the candidate operands for + // anti-dependence breaking. The candidates will be the def + // operands for the anti-dependencies that if broken would allow + // an improved schedule + if (NeedCandidates) { + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + AntiDepCandidates.clear(); + AvailableQueue.initNodes(SUnits); + ListScheduleTopDown(&AntiDepCandidates); + AvailableQueue.releaseState(); + } + unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex); - if (Broken == 0) - break; + AntiDepBreak->BreakAntiDependencies(SUnits, AntiDepCandidates, + Begin, InsertPos, InsertPosIndex); // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: @@ -336,24 +354,26 @@ void SchedulePostRATDList::Schedule() { // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - SUnits.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); + if ((Broken != 0) || NeedCandidates) { + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + BuildSchedGraph(AA); + } NumFixedAnti += Broken; + if (Broken == 0) + break; } } DEBUG(errs() << "********** List Scheduling **********\n"); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); AvailableQueue.initNodes(SUnits); - - ListScheduleTopDown(); - + ListScheduleTopDown(NULL); AvailableQueue.releaseState(); } @@ -552,7 +572,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. -void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, + bool IgnoreAntiDep) { SUnit *SuccSU = SuccEdge->getSUnit(); #ifndef NDEBUG @@ -568,7 +589,8 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. - SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + SuccSU->setDepthToAtLeast(SU->getDepth(IgnoreAntiDep) + + SuccEdge->getLatency(), IgnoreAntiDep); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. @@ -577,40 +599,73 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { } /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. -void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - ReleaseSucc(SU, &*I); + I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + ReleaseSucc(SU, &*I, IgnoreAntiDep); + } } /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, + bool IgnoreAntiDep) { DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); + assert(CurCycle >= SU->getDepth(IgnoreAntiDep) && + "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle, IgnoreAntiDep); - ReleaseSuccessors(SU); + ReleaseSuccessors(SU, IgnoreAntiDep); SU->isScheduled = true; AvailableQueue.ScheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void SchedulePostRATDList::ListScheduleTopDown() { +void SchedulePostRATDList::ListScheduleTopDown( + AntiDepBreaker::CandidateMap *AntiDepCandidates) { unsigned CurCycle = 0; + const bool IgnoreAntiDep = (AntiDepCandidates != NULL); + + // We're scheduling top-down but we're visiting the regions in + // bottom-up order, so we don't know the hazards at the start of a + // region. So assume no hazards (this should usually be ok as most + // blocks are a single region). + HazardRec->Reset(); + + // If ignoring anti-dependencies, the Schedule DAG still has Anti + // dep edges, but we ignore them for scheduling purposes + AvailableQueue.setIgnoreAntiDep(IgnoreAntiDep); // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); + ReleaseSuccessors(&EntrySU, IgnoreAntiDep); - // All leaves to Available queue. + // Add all leaves to Available queue. If ignoring antideps we also + // adjust the predecessor count for each node to not include antidep + // edges. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { + bool available = SUnits[i].Preds.empty(); + // If we are ignoring anti-dependencies then a node that has only + // anti-dep predecessors is available. + if (!available && IgnoreAntiDep) { + available = true; + for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(), + E = SUnits[i].Preds.end(); I != E; ++I) { + if (I->getKind() != SDep::Anti) { + available = false; + } else { + SUnits[i].NumPredsLeft -= 1; + } + } + } + + if (available) { AvailableQueue.push(&SUnits[i]); SUnits[i].isAvailable = true; } @@ -629,26 +684,25 @@ void SchedulePostRATDList::ListScheduleTopDown() { // so, add them to the available queue. unsigned MinDepth = ~0u; for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->getDepth() <= CurCycle) { + if (PendingQueue[i]->getDepth(IgnoreAntiDep) <= CurCycle) { AvailableQueue.push(PendingQueue[i]); PendingQueue[i]->isAvailable = true; PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else if (PendingQueue[i]->getDepth() < MinDepth) - MinDepth = PendingQueue[i]->getDepth(); + } else if (PendingQueue[i]->getDepth(IgnoreAntiDep) < MinDepth) + MinDepth = PendingQueue[i]->getDepth(IgnoreAntiDep); } DEBUG(errs() << "\n*** Examining Available\n"; LatencyPriorityQueue q = AvailableQueue; while (!q.empty()) { SUnit *su = q.pop(); - errs() << "Height " << su->getHeight() << ": "; + errs() << "Height " << su->getHeight(IgnoreAntiDep) << ": "; su->dump(this); }); SUnit *FoundSUnit = 0; - bool HasNoopHazards = false; while (!AvailableQueue.empty()) { SUnit *CurSUnit = AvailableQueue.pop(); @@ -672,9 +726,30 @@ void SchedulePostRATDList::ListScheduleTopDown() { NotReady.clear(); } - // If we found a node to schedule, do it now. + // If we found a node to schedule... if (FoundSUnit) { - ScheduleNodeTopDown(FoundSUnit, CurCycle); + // If we are ignoring anti-dependencies and the SUnit we are + // scheduling has an antidep predecessor that has not been + // scheduled, then we will need to break that antidep if we want + // to get this schedule when not ignoring anti-dependencies. + if (IgnoreAntiDep) { + AntiDepBreaker::AntiDepRegVector AntiDepRegs; + for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(), + E = FoundSUnit->Preds.end(); I != E; ++I) { + if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled) + AntiDepRegs.push_back(I->getReg()); + } + + if (AntiDepRegs.size() > 0) { + DEBUG(errs() << "*** AntiDep Candidate: "); + DEBUG(FoundSUnit->dump(this)); + AntiDepCandidates->insert( + AntiDepBreaker::CandidateMap::value_type(FoundSUnit, AntiDepRegs)); + } + } + + // ... schedule the node... + ScheduleNodeTopDown(FoundSUnit, CurCycle, IgnoreAntiDep); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 5a59862090b..1363a92fed6 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -183,8 +183,8 @@ void SUnit::setHeightDirty() { /// setDepthToAtLeast - Update this node's successors to reflect the /// fact that this node's depth just increased. /// -void SUnit::setDepthToAtLeast(unsigned NewDepth) { - if (NewDepth <= getDepth()) +void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) { + if (NewDepth <= getDepth(IgnoreAntiDep)) return; setDepthDirty(); Depth = NewDepth; @@ -194,8 +194,8 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth) { /// setHeightToAtLeast - Update this node's predecessors to reflect the /// fact that this node's height just increased. /// -void SUnit::setHeightToAtLeast(unsigned NewHeight) { - if (NewHeight <= getHeight()) +void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) { + if (NewHeight <= getHeight(IgnoreAntiDep)) return; setHeightDirty(); Height = NewHeight; @@ -204,7 +204,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight) { /// ComputeDepth - Calculate the maximal path from the node to the exit. /// -void SUnit::ComputeDepth() { +void SUnit::ComputeDepth(bool IgnoreAntiDep) { SmallVector WorkList; WorkList.push_back(this); do { @@ -214,6 +214,7 @@ void SUnit::ComputeDepth() { unsigned MaxPredDepth = 0; for (SUnit::const_pred_iterator I = Cur->Preds.begin(), E = Cur->Preds.end(); I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; SUnit *PredSU = I->getSUnit(); if (PredSU->isDepthCurrent) MaxPredDepth = std::max(MaxPredDepth, @@ -237,7 +238,7 @@ void SUnit::ComputeDepth() { /// ComputeHeight - Calculate the maximal path from the node to the entry. /// -void SUnit::ComputeHeight() { +void SUnit::ComputeHeight(bool IgnoreAntiDep) { SmallVector WorkList; WorkList.push_back(this); do { @@ -247,6 +248,7 @@ void SUnit::ComputeHeight() { unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { + if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, @@ -346,7 +348,7 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { AnyNotSched = true; } if (SUnits[i].isScheduled && - (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > unsigned(INT_MAX)) { if (!AnyNotSched) errs() << "*** Scheduling failed! ***\n";