diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 10d1adf799c..addfccbd002 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -143,6 +143,12 @@ private: std::vector LiveRegDefs; std::vector LiveRegGens; + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector Interferences; + typedef DenseMap > LRegsMapT; + LRegsMapT LRegsMap; + /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; @@ -226,6 +232,8 @@ private: SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + void releaseInterferences(unsigned Reg = 0); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -322,6 +330,7 @@ void ScheduleDAGRRList::Schedule() { LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -735,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -749,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -804,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } @@ -831,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -1315,34 +1328,58 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { return !LRegs.empty(); } +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVector &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SmallVector Interferences; - DenseMap > LRegsMap; - - SUnit *CurSU = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); while (CurSU) { SmallVector LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); + DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0]) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Intereferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } CurSU = AvailableQueue->pop(); } - if (CurSU) { - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - assert(Interferences[i]->isAvailable && "must still be available"); - AvailableQueue->push(Interferences[i]); - } + if (CurSU) return CurSU; - } // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies @@ -1363,6 +1400,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } } if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that @@ -1372,19 +1410,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) { + // node is no longer available. + if (!TrySU->isAvailable) CurSU = AvailableQueue->pop(); - } else { + AvailableQueue->remove(TrySU); CurSU = TrySU; - TrySU->isPending = false; - Interferences.erase(Interferences.begin()+i); } + // Interferences has been mutated. We must break. break; } } @@ -1435,17 +1473,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TrySU->isAvailable = false; CurSU = NewDef; } - assert(CurSU && "Unable to resolve live physical register dependencies!"); - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - // May no longer be available due to backtracking. - if (Interferences[i]->isAvailable) { - AvailableQueue->push(Interferences[i]); - } - } return CurSU; } @@ -1466,7 +1494,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { + while (!AvailableQueue->empty() || !Interferences.empty()) { DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll new file mode 100644 index 00000000000..b792ffa09fb --- /dev/null +++ b/test/CodeGen/X86/pre-ra-sched.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \ +; RUN: 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; rdar:13279013: pre-RA-sched should not check all interferences and +; repush them on the ready queue after scheduling each instruction. +; +; CHECK: *** List Scheduling +; CHECK: Interfering reg EFLAGS +; CHECK: Repushing +; CHECK: Repushing +; CHECK: Repushing +; CHECK-NOT: Repushing +; CHECK: *** Final schedule +define i32 @test(i8* %pin) #0 { + %g0 = getelementptr inbounds i8* %pin, i64 0 + %l0 = load i8* %g0, align 1 + + %g1a = getelementptr inbounds i8* %pin, i64 1 + %l1a = load i8* %g1a, align 1 + %z1a = zext i8 %l1a to i32 + %g1b = getelementptr inbounds i8* %pin, i64 2 + %l1b = load i8* %g1b, align 1 + %z1b = zext i8 %l1b to i32 + %c1 = icmp ne i8 %l0, 0 + %x1 = xor i32 %z1a, %z1b + %s1 = select i1 %c1, i32 %z1a, i32 %x1 + + %g2a = getelementptr inbounds i8* %pin, i64 3 + %l2a = load i8* %g2a, align 1 + %z2a = zext i8 %l2a to i32 + %g2b = getelementptr inbounds i8* %pin, i64 4 + %l2b = load i8* %g2b, align 1 + %z2b = zext i8 %l2b to i32 + %x2 = xor i32 %z2a, %z2b + %s2 = select i1 %c1, i32 %z2a, i32 %x2 + + %g3a = getelementptr inbounds i8* %pin, i64 5 + %l3a = load i8* %g3a, align 1 + %z3a = zext i8 %l3a to i32 + %g3b = getelementptr inbounds i8* %pin, i64 6 + %l3b = load i8* %g3b, align 1 + %z3b = zext i8 %l3b to i32 + %x3 = xor i32 %z3a, %z3b + %s3 = select i1 %c1, i32 %z3a, i32 %x3 + + %c3 = icmp ne i8 %l1a, 0 + %c4 = icmp ne i8 %l2a, 0 + + %s4 = select i1 %c3, i32 %s1, i32 %s2 + %s5 = select i1 %c4, i32 %s4, i32 %s3 + + ret i32 %s5 +} + +attributes #0 = { nounwind ssp uwtable }