From 4c8c83022b501759d8559e224c84ae2a9921ba41 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 21 Jun 2008 15:52:51 +0000 Subject: [PATCH] Change ScheduleDAG's SUnitMap from DenseMap > to DenseMap, and adjust the way cloned SUnit nodes are handled so that only the original node needs to be in the map. This speeds up llc on 447.dealII.llvm.bc by about 2%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52576 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAG.h | 16 ++++---- lib/CodeGen/SelectionDAG/ScheduleDAG.cpp | 35 +++++++++------- lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 3 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 40 +++++++++++-------- .../SelectionDAG/SelectionDAGPrinter.cpp | 2 +- 5 files changed, 54 insertions(+), 42 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 218a3377d87..18aa97a9694 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -95,8 +95,8 @@ namespace llvm { struct SUnit { SDNode *Node; // Representative node. SmallVector FlaggedNodes;// All nodes flagged to Node. - unsigned InstanceNo; // Instance#. One SDNode can be multiple - // SUnit due to cloning. + SUnit *OrigNode; // If not this, the node from which + // this node was cloned. // Preds/Succs - The SUnits before/after us in the graph. The boolean value // is true if the edge is a token chain edge, false if it is a value edge. @@ -129,7 +129,7 @@ namespace llvm { const TargetRegisterClass *CopySrcRC; SUnit(SDNode *node, unsigned nodenum) - : Node(node), InstanceNo(0), NodeNum(nodenum), NodeQueueId(0), Latency(0), + : Node(node), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), isPending(false), isAvailable(false), isScheduled(false), @@ -215,7 +215,7 @@ namespace llvm { public: virtual ~SchedulingPriorityQueue() {} - virtual void initNodes(DenseMap > &SUMap, + virtual void initNodes(DenseMap &SUMap, std::vector &SUnits) = 0; virtual void addNode(const SUnit *SU) = 0; virtual void updateNode(const SUnit *SU) = 0; @@ -251,8 +251,7 @@ namespace llvm { MachineConstantPool *ConstPool; // Target constant pool std::vector Sequence; // The schedule. Null SUnit*'s // represent noop instructions. - DenseMap > SUnitMap; - // SDNode to SUnit mapping (n -> n). + DenseMap SUnitMap; // SDNode to SUnit mapping (n -> n). std::vector SUnits; // The scheduling units. SmallSet CommuteSet; // Nodes that should be commuted. @@ -291,6 +290,7 @@ namespace llvm { /// SUnit *NewSUnit(SDNode *N) { SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + SUnits.back().OrigNode = &SUnits.back(); return &SUnits.back(); } @@ -331,7 +331,7 @@ namespace llvm { /// VRBaseMap contains, for each already emitted node, the first virtual /// register number for the results of the node. /// - void EmitNode(SDNode *Node, unsigned InstNo, + void EmitNode(SDNode *Node, bool IsClone, DenseMap &VRBaseMap); /// EmitNoop - Emit a noop instruction. @@ -370,7 +370,7 @@ namespace llvm { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned InstNo, + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, unsigned SrcReg, DenseMap &VRBaseMap); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index d3cc11693e4..d61a0982c32 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -79,13 +79,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *Use, unsigned Op, SUnit *ScheduleDAG::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->Node); + SU->OrigNode = Old->OrigNode; SU->FlaggedNodes = Old->FlaggedNodes; - SU->InstanceNo = SUnitMap[Old->Node].size(); SU->Latency = Old->Latency; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; - SUnitMap[Old->Node].push_back(SU); return SU; } @@ -105,7 +104,7 @@ void ScheduleDAG::BuildSchedUnits() { continue; // If this node has already been processed, stop now. - if (!SUnitMap[NI].empty()) continue; + if (SUnitMap.count(NI)) continue; SUnit *NodeSUnit = NewSUnit(NI); @@ -120,7 +119,9 @@ void ScheduleDAG::BuildSchedUnits() { do { N = N->getOperand(N->getNumOperands()-1).Val; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N].push_back(NodeSUnit); + bool isNew = SUnitMap.insert(std::make_pair(N, NodeSUnit)); + isNew = isNew; + assert(isNew && "Node already inserted!"); } while (N->getNumOperands() && N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag); std::reverse(NodeSUnit->FlaggedNodes.begin(), @@ -140,7 +141,9 @@ void ScheduleDAG::BuildSchedUnits() { if (FlagVal.isOperandOf(UI->getUser())) { HasFlagUse = true; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N].push_back(NodeSUnit); + bool isNew = SUnitMap.insert(std::make_pair(N, NodeSUnit)); + isNew = isNew; + assert(isNew && "Node already inserted!"); N = UI->getUser(); break; } @@ -150,7 +153,9 @@ void ScheduleDAG::BuildSchedUnits() { // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. // Update the SUnit NodeSUnit->Node = N; - SUnitMap[N].push_back(NodeSUnit); + bool isNew = SUnitMap.insert(std::make_pair(N, NodeSUnit)); + isNew = isNew; + assert(isNew && "Node already inserted!"); ComputeLatency(NodeSUnit); } @@ -187,7 +192,7 @@ void ScheduleDAG::BuildSchedUnits() { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).Val; if (isPassiveNode(OpN)) continue; // Not scheduled. - SUnit *OpSU = SUnitMap[OpN].front(); + SUnit *OpSU = SUnitMap[OpN]; assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. @@ -399,12 +404,12 @@ static const TargetRegisterClass *getInstrOperandRegClass( } void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, - unsigned InstanceNo, unsigned SrcReg, + bool IsClone, unsigned SrcReg, DenseMap &VRBaseMap) { unsigned VRBase = 0; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! - if (InstanceNo > 0) + if (IsClone) VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo),SrcReg)); isNew = isNew; // Silence compiler warning. @@ -463,7 +468,7 @@ void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, TII->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, DstRC, SrcRC); } - if (InstanceNo > 0) + if (IsClone) VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo), VRBase)); isNew = isNew; // Silence compiler warning. @@ -783,7 +788,7 @@ void ScheduleDAG::EmitSubregNode(SDNode *Node, /// EmitNode - Generate machine code for an node and needed dependencies. /// -void ScheduleDAG::EmitNode(SDNode *Node, unsigned InstanceNo, +void ScheduleDAG::EmitNode(SDNode *Node, bool IsClone, DenseMap &VRBaseMap) { // If machine instruction if (Node->isTargetOpcode()) { @@ -858,7 +863,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, unsigned InstanceNo, for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, InstanceNo, Reg, VRBaseMap); + EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap); } } } else { @@ -906,7 +911,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, unsigned InstanceNo, } case ISD::CopyFromReg: { unsigned SrcReg = cast(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, InstanceNo, SrcReg, VRBaseMap); + EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap); break; } case ISD::INLINEASM: { @@ -1118,11 +1123,11 @@ void ScheduleDAG::EmitSchedule() { continue; } for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; ++j) - EmitNode(SU->FlaggedNodes[j], SU->InstanceNo, VRBaseMap); + EmitNode(SU->FlaggedNodes[j], SU->OrigNode != SU, VRBaseMap); if (!SU->Node) EmitCrossRCCopy(SU, CopyVRBaseMap); else - EmitNode(SU->Node, SU->InstanceNo, VRBaseMap); + EmitNode(SU->Node, SU->OrigNode != SU, VRBaseMap); } if (isEntryBB && SchedLiveInCopies) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index c2fae250679..909588cad54 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -177,6 +177,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector NotReady; + Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty() || !PendingQueue.empty()) { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -319,7 +320,7 @@ public: LatencyPriorityQueue() : Queue(latency_sort(this)) { } - void initNodes(DenseMap > &sumap, + void initNodes(DenseMap &sumap, std::vector &sunits) { SUnits = &sunits; // Calculate node priorities. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index fc20170616a..f07bcd5fb10 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -254,7 +254,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { continue; SDNode *OpN = SU->Node->getOperand(j).Val; - SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; + SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN]; if (OpSU && OperandSeen.count(OpSU) == 1) { // Ok, so SU is not the last use of OpSU, but SU is two-address so // it will clobber OpSU. Try to commute SU if no other source operands @@ -263,7 +263,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { for (unsigned k = 0; k < NumOps; ++k) { if (k != j) { OpN = SU->Node->getOperand(k).Val; - OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; + OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN]; if (OpSU && OperandSeen.count(OpSU) == 1) { DoCommute = false; break; @@ -282,7 +282,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (!I->isCtrl) - OperandSeen.insert(I->Dep); + OperandSeen.insert(I->Dep->OrigNode); } } } @@ -660,7 +660,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TryUnfold) { - SmallVector NewNodes; + SmallVector NewNodes; if (!TII->unfoldMemoryOperand(DAG, N, NewNodes)) return NULL; @@ -677,7 +677,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SDOperand(LoadNode, 1)); SUnit *NewSU = CreateNewSUnit(N); - SUnitMap[N].push_back(NewSU); + bool isNew = SUnitMap.insert(std::make_pair(N, NewSU)); + isNew = isNew; + assert(isNew && "Node already inserted!"); + const TargetInstrDesc &TID = TII->get(N->getTargetOpcode()); for (unsigned i = 0; i != TID.getNumOperands(); ++i) { if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { @@ -697,14 +700,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // but it has different alignment or volatileness. bool isNewLoad = true; SUnit *LoadSU; - DenseMap >::iterator SMI = - SUnitMap.find(LoadNode); + DenseMap::iterator SMI = SUnitMap.find(LoadNode); if (SMI != SUnitMap.end()) { - LoadSU = SMI->second.front(); + LoadSU = SMI->second; isNewLoad = false; } else { LoadSU = CreateNewSUnit(LoadNode); - SUnitMap[LoadNode].push_back(LoadSU); + bool isNew = SUnitMap.insert(std::make_pair(LoadNode, LoadSU)); + isNew = isNew; + assert(isNew && "Node already inserted!"); LoadSU->Depth = SU->Depth; LoadSU->Height = SU->Height; @@ -943,7 +947,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { unsigned CurCycle = 0; // Add root to Available queue. if (!SUnits.empty()) { - SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); + SUnit *RootSU = SUnitMap[DAG.getRoot().Val]; assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); RootSU->isAvailable = true; AvailableQueue->push(RootSU); @@ -952,6 +956,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. SmallVector NotReady; + Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { bool Delayed = false; DenseMap > LRegsMap; @@ -1174,6 +1179,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector NotReady; + Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { SUnit *CurSU = AvailableQueue->pop(); while (CurSU && CurSU->CycleBound > CurCycle) { @@ -1277,7 +1283,7 @@ namespace { RegReductionPriorityQueue() : Queue(SF(this)), currentQueueId(0) {} - virtual void initNodes(DenseMap > &sumap, + virtual void initNodes(DenseMap &sumap, std::vector &sunits) {} virtual void addNode(const SUnit *SU) {} @@ -1327,7 +1333,7 @@ namespace { class VISIBILITY_HIDDEN BURegReductionPriorityQueue : public RegReductionPriorityQueue { // SUnitMap SDNode to SUnit mapping (n -> n). - DenseMap > *SUnitMap; + DenseMap *SUnitMap; // SUnits - The SUnits for the current graph. const std::vector *SUnits; @@ -1343,7 +1349,7 @@ namespace { const TargetRegisterInfo *tri) : TII(tii), TRI(tri), scheduleDAG(NULL) {} - void initNodes(DenseMap > &sumap, + void initNodes(DenseMap &sumap, std::vector &sunits) { SUnitMap = &sumap; SUnits = &sunits; @@ -1414,7 +1420,7 @@ namespace { class VISIBILITY_HIDDEN TDRegReductionPriorityQueue : public RegReductionPriorityQueue { // SUnitMap SDNode to SUnit mapping (n -> n). - DenseMap > *SUnitMap; + DenseMap *SUnitMap; // SUnits - The SUnits for the current graph. const std::vector *SUnits; @@ -1425,7 +1431,7 @@ namespace { public: TDRegReductionPriorityQueue() {} - void initNodes(DenseMap > &sumap, + void initNodes(DenseMap &sumap, std::vector &sunits) { SUnitMap = &sumap; SUnits = &sunits; @@ -1560,7 +1566,7 @@ BURegReductionPriorityQueue::canClobber(const SUnit *SU, const SUnit *Op) { if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { SDNode *DU = SU->Node->getOperand(i).Val; if ((*SUnitMap).find(DU) != (*SUnitMap).end() && - Op == (*SUnitMap)[DU][SU->InstanceNo]) + Op->OrigNode == (*SUnitMap)[DU]) return true; } } @@ -1636,7 +1642,7 @@ void BURegReductionPriorityQueue::AddPseudoTwoAddrDeps() { SDNode *DU = SU->Node->getOperand(j).Val; if ((*SUnitMap).find(DU) == (*SUnitMap).end()) continue; - SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; + SUnit *DUSU = (*SUnitMap)[DU]; if (!DUSU) continue; for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); I != E; ++I) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index f9221680198..995d877d9da 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -303,7 +303,7 @@ namespace llvm { GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); if (G->DAG.getRoot().Val && G->SUnitMap.find(G->DAG.getRoot().Val) != G->SUnitMap.end()) - GW.emitEdge(0, -1, G->SUnitMap[G->DAG.getRoot().Val].front(), -1, ""); + GW.emitEdge(0, -1, G->SUnitMap[G->DAG.getRoot().Val], -1, ""); } }; }