From 21ca0b3ea45549f6f16c5b2d0e96ad49256baa1d Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 17 May 2013 16:50:44 +0000 Subject: [PATCH] R600: Use depth first scheduling algorithm It should increase PV substitution opportunities and lower gpr usage (pending computations path are "flushed" sooner) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182128 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600MachineScheduler.cpp | 78 ++++++++---------------- lib/Target/R600/R600MachineScheduler.h | 32 ++-------- test/CodeGen/R600/bfi_int.ll | 2 +- test/CodeGen/R600/pv.ll | 2 +- 4 files changed, 33 insertions(+), 81 deletions(-) diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 5bf1e33f401..aeb2674f4e6 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -21,7 +21,6 @@ #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -31,9 +30,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { TII = static_cast(DAG->TII); TRI = static_cast(DAG->TRI); MRI = &DAG->MRI; - Available[IDAlu]->clear(); - Available[IDFetch]->clear(); - Available[IDOther]->clear(); CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; @@ -44,16 +40,11 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); } -void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) +void R600SchedStrategy::MoveUnits(std::vector &QSrc, + std::vector &QDst) { - if (QSrc->empty()) - return; - for (ReadyQueue::iterator I = QSrc->begin(), - E = QSrc->end(); I != E; ++I) { - (*I)->NodeQueueId &= ~QSrc->getID(); - QDst->push(*I); - } - QSrc->clear(); + QDst.insert(QDst.end(), QSrc.begin(), QSrc.end()); + QSrc.clear(); } SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { @@ -64,9 +55,9 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { // check if we might want to switch current clause type bool AllowSwitchToAlu = (CurInstKind == IDOther) || (CurEmitted >= InstKindLimit[CurInstKind]) || - (Available[CurInstKind]->empty()); + (Available[CurInstKind].empty()); bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && - (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); + (!Available[IDFetch].empty() || !Available[IDOther].empty()); if ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { @@ -99,10 +90,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SU->dump(DAG); } else { dbgs() << "NO NODE "; - for (int i = 0; i < IDLast; ++i) { - Available[i]->dump(); - Pending[i]->dump(); - } for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) @@ -163,7 +150,7 @@ void R600SchedStrategy::releaseTopNode(SUnit *SU) { DEBUG(dbgs() << IK << " <= "); DEBUG(SU->dump(DAG)); - Pending[IK]->push(SU); + Pending[IK].push_back(SU); } void R600SchedStrategy::releaseBottomNode(SUnit *SU) { @@ -263,16 +250,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -SUnit *R600SchedStrategy::PopInst(std::multiset &Q) { +SUnit *R600SchedStrategy::PopInst(std::vector &Q) { if (Q.empty()) return NULL; - for (std::set::iterator It = Q.begin(), E = Q.end(); + for (std::vector::reverse_iterator It = Q.rbegin(), E = Q.rend(); It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); if (TII->canBundle(InstructionsGroupCandidate)) { InstructionsGroupCandidate.pop_back(); - Q.erase(It); + Q.erase((It + 1).base()); return SU; } else { InstructionsGroupCandidate.pop_back(); @@ -282,14 +269,12 @@ SUnit *R600SchedStrategy::PopInst(std::multiset &Q) { } void R600SchedStrategy::LoadAlu() { - ReadyQueue *QSrc = Pending[IDAlu]; - for (ReadyQueue::iterator I = QSrc->begin(), - E = QSrc->end(); I != E; ++I) { - (*I)->NodeQueueId &= ~QSrc->getID(); - AluKind AK = getAluKind(*I); - AvailableAlus[AK].insert(*I); - } - QSrc->clear(); + std::vector &QSrc = Pending[IDAlu]; + for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { + AluKind AK = getAluKind(QSrc[i]); + AvailableAlus[AK].push_back(QSrc[i]); + } + QSrc.clear(); } void R600SchedStrategy::PrepareNextSlot() { @@ -331,27 +316,16 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); - SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); - if (!UnslotedSU) { + if (SlotedSU) return SlotedSU; - } else if (!SlotedSU) { + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); + if (UnslotedSU) AssignSlot(UnslotedSU->getInstr(), Slot); - return UnslotedSU; - } else { - //Determine which one to pick (the lesser one) - if (CompareSUnit()(SlotedSU, UnslotedSU)) { - AvailableAlus[AluAny].insert(UnslotedSU); - return SlotedSU; - } else { - AvailableAlus[IndexToID[Slot]].insert(SlotedSU); - AssignSlot(UnslotedSU->getInstr(), Slot); - return UnslotedSU; - } - } + return UnslotedSU; } bool R600SchedStrategy::isAvailablesAluEmpty() const { - return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && + return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() && AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); @@ -389,14 +363,14 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit* R600SchedStrategy::pickOther(int QID) { SUnit *SU = 0; - ReadyQueue *AQ = Available[QID]; + std::vector &AQ = Available[QID]; - if (AQ->empty()) { + if (AQ.empty()) { MoveUnits(Pending[QID], AQ); } - if (!AQ->empty()) { - SU = *AQ->begin(); - AQ->remove(AQ->begin()); + if (!AQ.empty()) { + SU = AQ.back(); + AQ.resize(AQ.size() - 1); } return SU; } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index 3d0367fd8eb..c82ee49c78b 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -24,13 +24,6 @@ using namespace llvm; namespace llvm { -class CompareSUnit { -public: - bool operator()(const SUnit *S1, const SUnit *S2) { - return S1->getDepth() > S2->getDepth(); - } -}; - class R600SchedStrategy : public MachineSchedStrategy { const ScheduleDAGMI *DAG; @@ -38,12 +31,6 @@ class R600SchedStrategy : public MachineSchedStrategy { const R600RegisterInfo *TRI; MachineRegisterInfo *MRI; - enum InstQueue { - QAlu = 1, - QFetch = 2, - QOther = 4 - }; - enum InstKind { IDAlu, IDFetch, @@ -62,8 +49,9 @@ class R600SchedStrategy : public MachineSchedStrategy { AluLast }; - ReadyQueue *Available[IDLast], *Pending[IDLast]; - std::multiset AvailableAlus[AluLast]; + std::vector Available[IDLast], Pending[IDLast]; + std::vector AvailableAlus[AluLast]; + std::vector FakeCopy; InstKind CurInstKind; int CurEmitted; @@ -76,19 +64,9 @@ class R600SchedStrategy : public MachineSchedStrategy { public: R600SchedStrategy() : DAG(0), TII(0), TRI(0), MRI(0) { - Available[IDAlu] = new ReadyQueue(QAlu, "AAlu"); - Available[IDFetch] = new ReadyQueue(QFetch, "AFetch"); - Available[IDOther] = new ReadyQueue(QOther, "AOther"); - Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu"); - Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch"); - Pending[IDOther] = new ReadyQueue(QOther<<4, "POther"); } virtual ~R600SchedStrategy() { - for (unsigned I = 0; I < IDLast; ++I) { - delete Available[I]; - delete Pending[I]; - } } virtual void initialize(ScheduleDAGMI *dag); @@ -107,12 +85,12 @@ private: bool isAvailablesAluEmpty() const; SUnit *AttemptFillSlot (unsigned Slot); void PrepareNextSlot(); - SUnit *PopInst(std::multiset &Q); + SUnit *PopInst(std::vector &Q); void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); + void MoveUnits(std::vector &QSrc, std::vector &QDst); }; } // namespace llvm diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index 4244dcf3c77..f51060ffbed 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -37,7 +37,7 @@ entry: ; ((x & z) | (y & (x | z))) ; R600-CHECK: @bfi_sha256_ma ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV.x}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[xyzw]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}} ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}} diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 062b7412996..37c3d9d7d6d 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 | FileCheck %s ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) -;CHECK: CNDGE * T{{[0-9].[XYZW]}}, PV.x +;CHECK-NEXT: CNDGE T{{[0-9].[XYZW]}}, PV.x define void @main() #0 { main_body: