diff --git a/lib/Target/SparcV9/ModuloScheduling/MSSchedule.cpp b/lib/Target/SparcV9/ModuloScheduling/MSSchedule.cpp index 96662dd8870..5855f0a3cbb 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSSchedule.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/MSSchedule.cpp @@ -165,12 +165,27 @@ bool MSSchedule::resourcesFree(MSchedGraphNode *node, int cycle) { } -bool MSSchedule::constructKernel(int II, std::vector &branches) { +bool MSSchedule::constructKernel(int II, std::vector &branches, std::map &indVar) { - int stageNum = (schedule.rbegin()->first)/ II; + //Our schedule is allowed to have negative numbers, so lets calculate this offset + int offset = schedule.begin()->first; + if(offset > 0) + offset = 0; + + DEBUG(std::cerr << "Offset: " << offset << "\n"); + + //Not sure what happens in this case, but assert if offset is > II + //assert(offset > -II && "Offset can not be more then II"); + + std::vector > tempKernel; + + + int stageNum = ((schedule.rbegin()->first-offset)+1)/ II; + int maxSN = 0; + DEBUG(std::cerr << "Number of Stages: " << stageNum << "\n"); - for(int index = 0; index < II; ++index) { + for(int index = offset; index < (II+offset); ++index) { int count = 0; for(int i = index; i <= (schedule.rbegin()->first); i+=II) { if(schedule.count(i)) { @@ -179,26 +194,61 @@ bool MSSchedule::constructKernel(int II, std::vector &branches //Check if its a branch if((*I)->isBranch()) { assert(count == 0 && "Branch can not be from a previous iteration"); - kernel.push_back(std::make_pair(*I, count)); + tempKernel.push_back(std::make_pair(*I, count)); } - else + else { //FIXME: Check if the instructions in the earlier stage conflict - kernel.push_back(std::make_pair(*I, count)); + tempKernel.push_back(std::make_pair(*I, count)); + maxSN = std::max(maxSN, count); + } } } ++count; } } - - //Push on branches. Branch vector is in order of last branch to first. - for(std::vector::reverse_iterator B = branches.rbegin() , BE = branches.rend(); B != BE; ++B) { - kernel.push_back(std::make_pair(*B, 0)); + + //Add in induction var code + for(std::vector >::iterator I = tempKernel.begin(), IE = tempKernel.end(); + I != IE; ++I) { + //Add indVar instructions before this one for the current iteration + if(I->second == 0) { + std::map tmpMap; + + //Loop over induction variable instructions in the map that come before this instr + for(std::map::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { + + + if(N->second < I->first->getIndex()) + tmpMap[N->second] = (MachineInstr*) N->first; + } + + //Add to kernel, and delete from indVar + for(std::map::iterator N = tmpMap.begin(), NE = tmpMap.end(); N != NE; ++N) { + kernel.push_back(std::make_pair(N->second, 0)); + indVar.erase(N->second); + } + } + + kernel.push_back(std::make_pair((MachineInstr*) I->first->getInst(), I->second)); + } - if(stageNum > 0) - maxStage = stageNum; - else - maxStage = 0; + std::map tmpMap; + + //Add remaining invar instructions + for(std::map::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { + tmpMap[N->second] = (MachineInstr*) N->first; + } + + //Add to kernel, and delete from indVar + for(std::map::iterator N = tmpMap.begin(), NE = tmpMap.end(); N != NE; ++N) { + kernel.push_back(std::make_pair(N->second, 0)); + indVar.erase(N->second); + } + + + maxStage = maxSN; + return true; } @@ -214,7 +264,7 @@ void MSSchedule::print(std::ostream &os) const { } os << "Kernel:\n"; - for(std::vector >::const_iterator I = kernel.begin(), + for(std::vector >::const_iterator I = kernel.begin(), E = kernel.end(); I != E; ++I) os << "Node: " << *(I->first) << " Stage: " << I->second << "\n"; } diff --git a/lib/Target/SparcV9/ModuloScheduling/MSSchedule.h b/lib/Target/SparcV9/ModuloScheduling/MSSchedule.h index b94ab3eb54e..16cbab13f29 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSSchedule.h +++ b/lib/Target/SparcV9/ModuloScheduling/MSSchedule.h @@ -16,6 +16,7 @@ #include "MSchedGraph.h" #include +#include namespace llvm { @@ -30,7 +31,7 @@ namespace llvm { bool resourcesFree(MSchedGraphNode*, int); //Resulting kernel - std::vector > kernel; + std::vector > kernel; //Max stage count int maxStage; @@ -44,8 +45,8 @@ namespace llvm { bool insert(MSchedGraphNode *node, int cycle); int getStartCycle(MSchedGraphNode *node); void clear() { schedule.clear(); resourceNumPerCycle.clear(); kernel.clear(); } - std::vector >* getKernel() { return &kernel; } - bool constructKernel(int II, std::vector &branches); + std::vector >* getKernel() { return &kernel; } + bool constructKernel(int II, std::vector &branches, std::map &indVar); int getMaxStage() { return maxStage; } @@ -56,8 +57,8 @@ namespace llvm { schedule_iterator end() { return schedule.end(); }; void print(std::ostream &os) const; - typedef std::vector >::iterator kernel_iterator; - typedef std::vector >::const_iterator kernel_const_iterator; + typedef std::vector >::iterator kernel_iterator; + typedef std::vector >::const_iterator kernel_const_iterator; kernel_iterator kernel_begin() { return kernel.begin(); } kernel_iterator kernel_end() { return kernel.end(); } diff --git a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp index 9ac38b4dd3c..7b4680ebe21 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp @@ -7,8 +7,11 @@ // //===----------------------------------------------------------------------===// // -// A graph class for dependencies -// +// A graph class for dependencies. This graph only contains true, anti, and +// output data dependencies for a given MachineBasicBlock. Dependencies +// across iterations are also computed. Unless data dependence analysis +// is provided, a conservative approach of adding dependencies between all +// loads and stores is taken. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ModuloSched" @@ -22,8 +25,11 @@ #include "llvm/Support/Debug.h" #include #include +#include + using namespace llvm; +//MSchedGraphNode constructor MSchedGraphNode::MSchedGraphNode(const MachineInstr* inst, MSchedGraph *graph, unsigned idx, unsigned late, bool isBranch) @@ -33,6 +39,7 @@ MSchedGraphNode::MSchedGraphNode(const MachineInstr* inst, graph->addNode(inst, this); } +//MSchedGraphNode copy constructor MSchedGraphNode::MSchedGraphNode(const MSchedGraphNode &N) : Predecessors(N.Predecessors), Successors(N.Successors) { @@ -44,10 +51,13 @@ MSchedGraphNode::MSchedGraphNode(const MSchedGraphNode &N) } +//Print the node (instruction and latency) void MSchedGraphNode::print(std::ostream &os) const { os << "MSchedGraphNode: Inst=" << *Inst << ", latency= " << latency << "\n"; } + +//Get the edge from a predecessor to this node MSchedGraphEdge MSchedGraphNode::getInEdge(MSchedGraphNode *pred) { //Loop over all the successors of our predecessor //return the edge the corresponds to this in edge @@ -60,6 +70,7 @@ MSchedGraphEdge MSchedGraphNode::getInEdge(MSchedGraphNode *pred) { abort(); } +//Get the iteration difference for the edge from this node to its successor unsigned MSchedGraphNode::getIteDiff(MSchedGraphNode *succ) { for(std::vector::iterator I = Successors.begin(), E = Successors.end(); I != E; ++I) { @@ -69,7 +80,7 @@ unsigned MSchedGraphNode::getIteDiff(MSchedGraphNode *succ) { return 0; } - +//Get the index into the vector of edges for the edge from pred to this node unsigned MSchedGraphNode::getInEdgeNum(MSchedGraphNode *pred) { //Loop over all the successors of our predecessor //return the edge the corresponds to this in edge @@ -83,6 +94,8 @@ unsigned MSchedGraphNode::getInEdgeNum(MSchedGraphNode *pred) { assert(0 && "Should have found edge between this node and its predecessor!"); abort(); } + +//Determine if succ is a successor of this node bool MSchedGraphNode::isSuccessor(MSchedGraphNode *succ) { for(succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) if(*I == succ) @@ -90,7 +103,7 @@ bool MSchedGraphNode::isSuccessor(MSchedGraphNode *succ) { return false; } - +//Dtermine if pred is a predecessor of this node bool MSchedGraphNode::isPredecessor(MSchedGraphNode *pred) { if(std::find( Predecessors.begin(), Predecessors.end(), pred) != Predecessors.end()) return true; @@ -98,7 +111,7 @@ bool MSchedGraphNode::isPredecessor(MSchedGraphNode *pred) { return false; } - +//Add a node to the graph void MSchedGraph::addNode(const MachineInstr *MI, MSchedGraphNode *node) { @@ -109,6 +122,7 @@ void MSchedGraph::addNode(const MachineInstr *MI, GraphMap[MI] = node; } +//Delete a node to the graph void MSchedGraph::deleteNode(MSchedGraphNode *node) { //Delete the edge to this node from all predecessors @@ -123,7 +137,10 @@ void MSchedGraph::deleteNode(MSchedGraphNode *node) { } -MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ) +//Create a graph for a machine block. The ignoreInstrs map is so that we ignore instructions +//associated to the index variable since this is a special case in Modulo Scheduling. +//We only want to deal with the body of the loop. +MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ, AliasAnalysis &AA, TargetData &TD, std::map &ignoreInstrs) : BB(bb), Target(targ) { //Make sure BB is not null, @@ -132,9 +149,13 @@ MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ) //DEBUG(std::cerr << "Constructing graph for " << bb << "\n"); //Create nodes and edges for this BB - buildNodesAndEdges(); + buildNodesAndEdges(AA, TD, ignoreInstrs); + + //Experimental! + //addBranchEdges(); } +//Copies the graph and keeps a map from old to new nodes MSchedGraph::MSchedGraph(const MSchedGraph &G, std::map &newNodes) : BB(G.BB), Target(G.Target) { @@ -169,13 +190,86 @@ MSchedGraph::MSchedGraph(const MSchedGraph &G, std::mapsecond; } -void MSchedGraph::buildNodesAndEdges() { + +//Experimental code to add edges from the branch to all nodes dependent upon it. +void hasPath(MSchedGraphNode *node, std::set &visited, + std::set &branches, MSchedGraphNode *startNode, + std::set > &newEdges ) { + + visited.insert(node); + DEBUG(std::cerr << "Visiting: " << *node << "\n"); + //Loop over successors + for(unsigned i = 0; i < node->succ_size(); ++i) { + MSchedGraphEdge *edge = node->getSuccessor(i); + MSchedGraphNode *dest = edge->getDest(); + if(branches.count(dest)) + newEdges.insert(std::make_pair(dest, startNode)); + + //only visit if we have not already + else if(!visited.count(dest)) { + if(edge->getIteDiff() == 0) + hasPath(dest, visited, branches, startNode, newEdges);} + + } + +} + +//Experimental code to add edges from the branch to all nodes dependent upon it. +void MSchedGraph::addBranchEdges() { + std::set branches; + std::set nodes; + + for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) { + if(I->second->isBranch()) + if(I->second->hasPredecessors()) + branches.insert(I->second); + } + + //See if there is a path first instruction to the branches, if so, add an + //iteration dependence between that node and the branch + std::set > newEdges; + for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) { + std::set visited; + hasPath((I->second), visited, branches, (I->second), newEdges); + } + + //Spit out all edges we are going to add + unsigned min = GraphMap.size(); + if(newEdges.size() == 1) { + ((newEdges.begin())->first)->addOutEdge(((newEdges.begin())->second), + MSchedGraphEdge::BranchDep, + MSchedGraphEdge::NonDataDep, 1); + } + else { + + unsigned count = 0; + MSchedGraphNode *start; + MSchedGraphNode *end; + for(std::set >::iterator I = newEdges.begin(), E = newEdges.end(); I != E; ++I) { + + DEBUG(std::cerr << "Branch Edge from: " << *(I->first) << " to " << *(I->second) << "\n"); + + // if(I->second->getIndex() <= min) { + start = I->first; + end = I->second; + //min = I->second->getIndex(); + //} + start->addOutEdge(end, + MSchedGraphEdge::BranchDep, + MSchedGraphEdge::NonDataDep, 1); + } + } +} + + +//Add edges between the nodes +void MSchedGraph::buildNodesAndEdges(AliasAnalysis &AA, TargetData &TD, std::map &ignoreInstrs) { //Get Machine target information for calculating latency const TargetInstrInfo *MTI = Target.getInstrInfo(); @@ -190,6 +284,13 @@ void MSchedGraph::buildNodesAndEdges() { //Loop over instructions in MBB and add nodes and edges for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end(); MI != e; ++MI) { + + //Ignore indvar instructions + if(ignoreInstrs.count(MI)) { + ++index; + continue; + } + //Get each instruction of machine basic block, get the delay //using the op code, create a new node for it, and add to the //graph. @@ -262,7 +363,6 @@ void MSchedGraph::buildNodesAndEdges() { DEBUG(std::cerr << "Read Operation in a PHI node\n"); continue; } - if (const Value* srcI = mOp.getVRegValue()) { @@ -274,7 +374,7 @@ void MSchedGraph::buildNodesAndEdges() { //those instructions //to this one we are processing if(V != valuetoNodeMap.end()) { - addValueEdges(V->second, node, mOp.isUse(), mOp.isDef()); + addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), phiInstrs); //Add to value map V->second.push_back(std::make_pair(i,node)); @@ -295,14 +395,16 @@ void MSchedGraph::buildNodesAndEdges() { if(const PHINode *PN = dyn_cast(I)) { MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(PN); for (unsigned j = 0; j < tempMvec.size(); j++) { - DEBUG(std::cerr << "Inserting phi instr into map: " << *tempMvec[j] << "\n"); - phiInstrs.push_back((MachineInstr*) tempMvec[j]); + if(!ignoreInstrs.count(tempMvec[j])) { + DEBUG(std::cerr << "Inserting phi instr into map: " << *tempMvec[j] << "\n"); + phiInstrs.push_back((MachineInstr*) tempMvec[j]); + } } } } - addMemEdges(memInstructions); + addMemEdges(memInstructions, AA, TD); addMachRegEdges(regNumtoNodeMap); //Finally deal with PHI Nodes and Value* @@ -324,28 +426,30 @@ void MSchedGraph::buildNodesAndEdges() { //Get Operand const MachineOperand &mOp = (*I)->getOperand(i); if((mOp.getType() == MachineOperand::MO_VirtualRegister || mOp.getType() == MachineOperand::MO_CCRegister) && mOp.isUse()) { + //find the value in the map if (const Value* srcI = mOp.getVRegValue()) { - + //Find value in the map std::map >::iterator V - = valuetoNodeMap.find(srcI); - + = valuetoNodeMap.find(srcI); + //If there is something in the map already, add edges from //those instructions //to this one we are processing if(V != valuetoNodeMap.end()) { - addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), 1); + addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), phiInstrs, 1); } } } } - } -} + } +} +//Add dependencies for Value*s void MSchedGraph::addValueEdges(std::vector &NodesInMap, MSchedGraphNode *destNode, bool nodeIsUse, - bool nodeIsDef, int diff) { + bool nodeIsDef, std::vector &phiInstrs, int diff) { for(std::vector::iterator I = NodesInMap.begin(), E = NodesInMap.end(); I != E; ++I) { @@ -354,26 +458,34 @@ void MSchedGraph::addValueEdges(std::vector &NodesInMap, MSchedGraphNode *srcNode = I->second; MachineOperand mOp = srcNode->getInst()->getOperand(I->first); + if(diff > 0) + if(std::find(phiInstrs.begin(), phiInstrs.end(), srcNode->getInst()) == phiInstrs.end()) + continue; + //Node is a Def, so add output dep. if(nodeIsDef) { if(mOp.isUse()) { + DEBUG(std::cerr << "Edge from " << *srcNode << " to " << *destNode << " (itediff=" << diff << ", type=anti)\n"); srcNode->addOutEdge(destNode, MSchedGraphEdge::ValueDep, MSchedGraphEdge::AntiDep, diff); } if(mOp.isDef()) { + DEBUG(std::cerr << "Edge from " << *srcNode << " to " << *destNode << " (itediff=" << diff << ", type=output)\n"); srcNode->addOutEdge(destNode, MSchedGraphEdge::ValueDep, MSchedGraphEdge::OutputDep, diff); } } if(nodeIsUse) { - if(mOp.isDef()) + if(mOp.isDef()) { + DEBUG(std::cerr << "Edge from " << *srcNode << " to " << *destNode << " (itediff=" << diff << ", type=true)\n"); srcNode->addOutEdge(destNode, MSchedGraphEdge::ValueDep, MSchedGraphEdge::TrueDep, diff); + } } } } - +//Add dependencies for machine registers across iterations void MSchedGraph::addMachRegEdges(std::map >& regNumtoNodeMap) { //Loop over all machine registers in the map, and add dependencies //between the instructions that use it @@ -469,7 +581,9 @@ void MSchedGraph::addMachRegEdges(std::map >& } -void MSchedGraph::addMemEdges(const std::vector& memInst) { +//Add edges between all loads and stores +//Can be less strict with alias analysis and data dependence analysis. +void MSchedGraph::addMemEdges(const std::vector& memInst, AliasAnalysis &AA, TargetData &TD) { //Get Target machine instruction info const TargetInstrInfo *TMI = Target.getInstrInfo(); @@ -478,51 +592,132 @@ void MSchedGraph::addMemEdges(const std::vector& memInst) { //Knowing that they are in execution, add true, anti, and output dependencies for (unsigned srcIndex = 0; srcIndex < memInst.size(); ++srcIndex) { + MachineInstr *srcInst = (MachineInstr*) memInst[srcIndex]->getInst(); + //Get the machine opCode to determine type of memory instruction - MachineOpCode srcNodeOpCode = memInst[srcIndex]->getInst()->getOpcode(); + MachineOpCode srcNodeOpCode = srcInst->getOpcode(); + //All instructions after this one in execution order have an iteration delay of 0 for(unsigned destIndex = srcIndex + 1; destIndex < memInst.size(); ++destIndex) { - - //source is a Load, so add anti-dependencies (store after load) - if(TMI->isLoad(srcNodeOpCode)) - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::AntiDep); - + + MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst(); + + //Add Anti dependencies (store after load) + //Source is a Load + if(TMI->isLoad(srcNodeOpCode)) { + + //Destination is a store + if(TMI->isStore(destInst->getOpcode())) { + + //Get the Value* that we are reading from the load, always the first op + const MachineOperand &mOp = srcInst->getOperand(0); + assert((mOp.isUse() && (mOp.getType() == MachineOperand::MO_VirtualRegister)) && "Assumed first operand was a use and a value*\n"); + + //Get the value* for the store + const MachineOperand &mOp2 = destInst->getOperand(0); + assert(mOp2.getType() == MachineOperand::MO_VirtualRegister && "Assumed first operand was a value*\n"); + + //Only add the edge if we can't verify that they do not alias + if(AA.alias(mOp2.getVRegValue(), + (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), + mOp.getVRegValue(), + (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) + != AliasAnalysis::NoAlias) { + + //Add edge from load to store + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::AntiDep); + } + } + } + //If source is a store, add output and true dependencies if(TMI->isStore(srcNodeOpCode)) { - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::OutputDep); - else - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::TrueDep); + + //Get the Value* that we are reading from the store (src), always the first op + const MachineOperand &mOp = srcInst->getOperand(0); + assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Assumed first operand was a use and a value*\n"); + + //Get the Value* that we are reading from the load, always the first op + const MachineOperand &mOp2 = srcInst->getOperand(0); + assert((mOp2.isUse() && (mOp2.getType() == MachineOperand::MO_VirtualRegister)) && "Assumed first operand was a use and a value*\n"); + + //Only add the edge if we can't verify that they do not alias + if(AA.alias(mOp2.getVRegValue(), + (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), + mOp.getVRegValue(), + (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) + != AliasAnalysis::NoAlias) { + + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::OutputDep); + else + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::TrueDep); + } } } //All instructions before the src in execution order have an iteration delay of 1 for(unsigned destIndex = 0; destIndex < srcIndex; ++destIndex) { + + MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst(); + //source is a Load, so add anti-dependencies (store after load) - if(TMI->isLoad(srcNodeOpCode)) - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::AntiDep, 1); - if(TMI->isStore(srcNodeOpCode)) { - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::OutputDep, 1); - else - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::TrueDep, 1); - } + if(TMI->isLoad(srcNodeOpCode)) { + //Get the Value* that we are reading from the load, always the first op + const MachineOperand &mOp = srcInst->getOperand(0); + assert((mOp.isUse() && (mOp.getType() == MachineOperand::MO_VirtualRegister)) && "Assumed first operand was a use and a value*\n"); + //Get the value* for the store + const MachineOperand &mOp2 = destInst->getOperand(0); + assert(mOp2.getType() == MachineOperand::MO_VirtualRegister && "Assumed first operand was a value*\n"); + + //Only add the edge if we can't verify that they do not alias + if(AA.alias(mOp2.getVRegValue(), + (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), + mOp.getVRegValue(), + (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) + != AliasAnalysis::NoAlias) { + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::AntiDep, 1); + } + } + if(TMI->isStore(srcNodeOpCode)) { + + //Get the Value* that we are reading from the store (src), always the first op + const MachineOperand &mOp = srcInst->getOperand(0); + assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Assumed first operand was a use and a value*\n"); + + //Get the Value* that we are reading from the load, always the first op + const MachineOperand &mOp2 = srcInst->getOperand(0); + assert((mOp2.isUse() && (mOp2.getType() == MachineOperand::MO_VirtualRegister)) && "Assumed first operand was a use and a value*\n"); + + //Only add the edge if we can't verify that they do not alias + if(AA.alias(mOp2.getVRegValue(), + (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), + mOp.getVRegValue(), + (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) + != AliasAnalysis::NoAlias) { + + if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::OutputDep, 1); + else + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::TrueDep, 1); + } + } + } } diff --git a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h index 4a341ef3c66..ebff43e5900 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h +++ b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h @@ -7,37 +7,45 @@ // //===----------------------------------------------------------------------===// // -// A graph class for dependencies -// +// A graph class for dependencies. This graph only contains true, anti, and +// output data dependencies for a given MachineBasicBlock. Dependencies +// across iterations are also computed. Unless data dependence analysis +// is provided, a conservative approach of adding dependencies between all +// loads and stores is taken. //===----------------------------------------------------------------------===// #ifndef LLVM_MSCHEDGRAPH_H #define LLVM_MSCHEDGRAPH_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator" #include - namespace llvm { + class MSchedGraph; class MSchedGraphNode; template class MSchedGraphNodeIterator; - + //MSchedGraphEdge encapsulates the data dependence between nodes. It + //identifies the dependence type, on what, and the iteration + //difference struct MSchedGraphEdge { enum DataDepOrderType { TrueDep, AntiDep, OutputDep, NonDataDep }; enum MSchedGraphEdgeType { - MemoryDep, ValueDep, MachineRegister + MemoryDep, ValueDep, MachineRegister, BranchDep }; + //Get or set edge data MSchedGraphNode *getDest() const { return dest; } unsigned getIteDiff() { return iteDiff; } unsigned getDepOrderType() { return depOrderType; } @@ -55,6 +63,9 @@ namespace llvm { unsigned iteDiff; }; + //MSchedGraphNode represents a machine instruction and its + //corresponding latency. Each node also contains a list of its + //predecessors and sucessors. class MSchedGraphNode { const MachineInstr* Inst; //Machine Instruction @@ -63,9 +74,8 @@ namespace llvm { unsigned latency; //Latency of Instruction bool isBranchInstr; //Is this node the branch instr or not - std::vector Predecessors; //Predecessor Nodes - std::vector Successors; + std::vector Successors; //Successor edges public: MSchedGraphNode(const MachineInstr *inst, MSchedGraph *graph, @@ -73,7 +83,7 @@ namespace llvm { MSchedGraphNode(const MSchedGraphNode &N); - //Iterators + //Iterators - Predecessor and Succussor typedef std::vector::iterator pred_iterator; pred_iterator pred_begin() { return Predecessors.begin(); } pred_iterator pred_end() { return Predecessors.end(); } @@ -83,7 +93,6 @@ namespace llvm { pred_const_iterator pred_begin() const { return Predecessors.begin(); } pred_const_iterator pred_end() const { return Predecessors.end(); } - // Successor iterators. typedef MSchedGraphNodeIterator::const_iterator, const MSchedGraphNode> succ_const_iterator; succ_const_iterator succ_begin() const; @@ -93,39 +102,39 @@ namespace llvm { MSchedGraphNode> succ_iterator; succ_iterator succ_begin(); succ_iterator succ_end(); - unsigned succ_size() { return Successors.size(); } + //Get or set predecessor nodes, or successor edges void setPredecessor(unsigned index, MSchedGraphNode *dest) { Predecessors[index] = dest; } - + MSchedGraphNode* getPredecessor(unsigned index) { return Predecessors[index]; } - + MSchedGraphEdge* getSuccessor(unsigned index) { return &Successors[index]; } - + void deleteSuccessor(MSchedGraphNode *node) { for (unsigned i = 0; i != Successors.size(); ++i) if (Successors[i].getDest() == node) { Successors.erase(Successors.begin()+i); node->Predecessors.erase(std::find(node->Predecessors.begin(), node->Predecessors.end(), this)); - --i; + --i; //Decrease index var since we deleted a node } } - - void addOutEdge(MSchedGraphNode *destination, MSchedGraphEdge::MSchedGraphEdgeType type, unsigned deptype, unsigned diff=0) { Successors.push_back(MSchedGraphEdge(destination, type, deptype,diff)); destination->Predecessors.push_back(this); } + + //General methods to get and set data for the node const MachineInstr* getInst() { return Inst; } MSchedGraph* getParent() { return Parent; } bool hasPredecessors() { return (Predecessors.size() > 0); } @@ -139,11 +148,13 @@ namespace llvm { bool isSuccessor(MSchedGraphNode *); bool isPredecessor(MSchedGraphNode *); bool isBranch() { return isBranchInstr; } + //Debug support void print(std::ostream &os) const; void setParent(MSchedGraph *p) { Parent = p; } }; + //Node iterator for graph generation template class MSchedGraphNodeIterator : public forward_iterator { IteratorType I; // std::vector::iterator or const_iterator @@ -219,6 +230,7 @@ namespace llvm { + //Graph class to represent dependence graph class MSchedGraph { const MachineBasicBlock *BB; //Machine basic block @@ -229,20 +241,26 @@ namespace llvm { //Add Nodes and Edges to this graph for our BB typedef std::pair OpIndexNodePair; - void buildNodesAndEdges(); + void buildNodesAndEdges(AliasAnalysis &AA, TargetData &TD, std::map &ignoreInstrs); void addValueEdges(std::vector &NodesInMap, MSchedGraphNode *node, - bool nodeIsUse, bool nodeIsDef, int diff=0); + bool nodeIsUse, bool nodeIsDef, std::vector &phiInstrs, int diff=0); void addMachRegEdges(std::map >& regNumtoNodeMap); - void addMemEdges(const std::vector& memInst); + void addMemEdges(const std::vector& memInst, AliasAnalysis &AA, TargetData &TD); + void addBranchEdges(); public: - MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ); + MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ, AliasAnalysis &AA, TargetData &TD, + std::map &ignoreInstrs); + + //Copy constructor with maps to link old nodes to new nodes MSchedGraph(const MSchedGraph &G, std::map &newNodes); + + //Deconstructor! ~MSchedGraph(); - //Add Nodes to the Graph + //Add or delete nodes from the Graph void addNode(const MachineInstr* MI, MSchedGraphNode *node); void deleteNode(MSchedGraphNode *node); @@ -256,21 +274,23 @@ namespace llvm { unsigned size() { return GraphMap.size(); } reverse_iterator rbegin() { return GraphMap.rbegin(); } reverse_iterator rend() { return GraphMap.rend(); } + + //Get Target or original machine basic block const TargetMachine* getTarget() { return &Target; } const MachineBasicBlock* getBB() { return BB; } }; - - static MSchedGraphNode& getSecond(std::pair &Pair) { - return *Pair.second; - } + // Provide specializations of GraphTraits to be able to use graph - // iterators on the scheduling graph! - // + // iterators on the scheduling graph + static MSchedGraphNode& getSecond(std::pair &Pair) { + return *Pair.second; + } + template <> struct GraphTraits { typedef MSchedGraphNode NodeType; typedef MSchedGraphNode::succ_iterator ChildIteratorType; @@ -361,8 +381,6 @@ namespace llvm { return map_iterator(((MSchedGraph*)G)->end(), DerefFun(getSecond)); } }; - - } #endif diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp index f2e442e4866..5441d3cec47 100644 --- a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "ModuloSched" #include "ModuloScheduling.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFunction.h" @@ -131,6 +132,9 @@ namespace llvm { }; } + +#include + /// ModuloScheduling::runOnFunction - main transformation entry point /// The Swing Modulo Schedule algorithm has three basic steps: /// 1) Computation and Analysis of the dependence graph @@ -138,7 +142,8 @@ namespace llvm { /// 3) Scheduling /// bool ModuloSchedulingPass::runOnFunction(Function &F) { - + alarm(300); + bool Changed = false; int numMS = 0; @@ -147,7 +152,9 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { //Get MachineFunction MachineFunction &MF = MachineFunction::get(&F); - + AliasAnalysis &AA = getAnalysis(); + TargetData &TD = getAnalysis(); + //Worklist std::vector Worklist; @@ -169,6 +176,9 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { //Print out BB for debugging DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr)); + //Print out LLVM BB + DEBUG(std::cerr << "ModuloScheduling LLVMBB: \n"; (*BI)->getBasicBlock()->print(std::cerr)); + //Catch the odd case where we only have TmpInstructions and no real Value*s if(!CreateDefMap(*BI)) { //Clear out our maps for the next basic block that is processed @@ -181,7 +191,7 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { continue; } - MSchedGraph *MSG = new MSchedGraph(*BI, target); + MSchedGraph *MSG = new MSchedGraph(*BI, target, AA, TD, indVarInstrs[*BI]); //Write Graph out to file DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); @@ -242,7 +252,7 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { }); //Finally schedule nodes - bool haveSched = computeSchedule(); + bool haveSched = computeSchedule(*BI); //Print out final schedule DEBUG(schedule.print(std::cerr)); @@ -278,7 +288,8 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { //delete(llvmBB); //delete(*BI); } - + + alarm(0); return Changed; } @@ -345,7 +356,10 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { //Get Target machine instruction info const TargetInstrInfo *TMI = target.getInstrInfo(); - //Check each instruction and look for calls + //Check each instruction and look for calls, keep map to get index later + std::map indexMap; + + unsigned count = 0; for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { //Get opcode to check instruction type MachineOpCode OC = I->getOpcode(); @@ -360,7 +374,111 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi || OC == V9::MOVFNEr || OC == V9::MOVFNEi) return false; + + indexMap[I] = count; + + if(TMI->isNop(OC)) + continue; + + ++count; } + + //Apply a simple pattern match to make sure this loop can be modulo scheduled + //This means only loops with a branch associated to the iteration count + + //Get the branch + BranchInst *b = dyn_cast(((BasicBlock*) BI->getBasicBlock())->getTerminator()); + + //Get the condition for the branch (we already checked if it was conditional) + Value *cond = b->getCondition(); + + DEBUG(std::cerr << "Condition: " << *cond << "\n"); + + //List of instructions associated with induction variable + std::set indVar; + std::vector stack; + + BasicBlock *BB = (BasicBlock*) BI->getBasicBlock(); + + //Add branch + indVar.insert(b); + + if(Instruction *I = dyn_cast(cond)) + if(I->getParent() == BB) { + if (!assocIndVar(I, indVar, stack, BB)) + return false; + } + else + return false; + else + return false; + + //The indVar set must be >= 3 instructions for this loop to match (FIX ME!) + if(indVar.size() < 3 ) + return false; + + //Dump out instructions associate with indvar for debug reasons + DEBUG(for(std::set::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { + std::cerr << **N << "\n"; + }); + + //Convert list of LLVM Instructions to list of Machine instructions + std::map mIndVar; + for(std::set::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { + MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N); + for (unsigned j = 0; j < tempMvec.size(); j++) { + MachineOpCode OC = (tempMvec[j])->getOpcode(); + if(TMI->isNop(OC)) + continue; + if(!indexMap.count(tempMvec[j])) + continue; + mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]]; + DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n"); + } + } + + //Must have some guts to the loop body + if(mIndVar.size() >= (BI->size()-2)) + return false; + + //Put into a map for future access + indVarInstrs[BI] = mIndVar; + + return true; +} + +bool ModuloSchedulingPass::assocIndVar(Instruction *I, std::set &indVar, + std::vector &stack, BasicBlock *BB) { + + stack.push_back(I); + + //If this is a phi node, check if its the canonical indvar + if(PHINode *PN = dyn_cast(I)) { + if (Instruction *Inc = + dyn_cast(PN->getIncomingValueForBlock(BB))) + if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) + if (CI->equalsInt(1)) { + //We have found the indvar, so add the stack, and inc instruction to the set + indVar.insert(stack.begin(), stack.end()); + indVar.insert(Inc); + stack.pop_back(); + return true; + } + return false; + } + else { + //Loop over each of the instructions operands, check if they are an instruction and in this BB + for(unsigned i = 0; i < I->getNumOperands(); ++i) { + if(Instruction *N = dyn_cast(I->getOperand(i))) { + if(N->getParent() == BB) + if(!assocIndVar(N, indVar, stack, BB)) + return false; + } + } + } + + stack.pop_back(); return true; } @@ -444,7 +562,7 @@ int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) { findAllCircuits(graph, MII); int RecMII = 0; - for(std::set > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { + for(std::set > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { RecMII = std::max(RecMII, I->first); } @@ -508,6 +626,8 @@ bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode))); + DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n"); + return findEdge; } @@ -785,14 +905,21 @@ bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) { totalDelay += (*N)->getLatency(); if(lastN) { - totalDistance += (*N)->getInEdge(lastN).getIteDiff(); - } + int iteDiff = (*N)->getInEdge(lastN).getIteDiff(); + totalDistance += iteDiff; + if(iteDiff > 0) { + start = lastN; + end = *N; + } + } //Get the original node lastN = *N; recc.push_back(newNodes[*N]); @@ -807,10 +934,17 @@ bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vectorgetInEdgeNum(newNodes[lastN]))); - + if(start && end) { + //Insert reccurrence into the list + DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); + edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); + } + else { + //Insert reccurrence into the list + DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n"); + edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN]))); + + } //Adjust II until we get close to the inequality delay - II*distance <= 0 int RecMII = II; //Starting value int value = totalDelay-(RecMII * totalDistance); @@ -903,7 +1037,7 @@ void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) { //Ignore self loops if(nextSCC.size() > 1) { - + //Get least vertex in Vk if(!s) { s = nextSCC[0]; @@ -1053,16 +1187,52 @@ void ModuloSchedulingPass::searchPath(MSchedGraphNode *node, if(PO->count(*S)) { nodesToAdd.insert(*S); } - searchPath(*S, path, nodesToAdd); + //terminate + else + searchPath(*S, path, nodesToAdd); } - } //Pop Node off the path path.pop_back(); } +void ModuloSchedulingPass::pathToRecc(MSchedGraphNode *node, + std::vector &path, + std::set &poSet, + std::set &lastNodes) { + //Push node onto the path + path.push_back(node); + DEBUG(std::cerr << "Current node: " << *node << "\n"); + + //Loop over all successors and see if there is a path from this node to + //a recurrence in the partial order, if so.. add all nodes to be added to recc + for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; + ++S) { + DEBUG(std::cerr << "Succ:" << **S << "\n"); + //Check if we should ignore this edge first + if(ignoreEdge(node,*S)) + continue; + + if(poSet.count(*S)) { + DEBUG(std::cerr << "Found path to recc from no pred\n"); + //Loop over path, if it exists in lastNodes, then add to poset, and remove from lastNodes + for(std::vector::iterator I = path.begin(), IE = path.end(); I != IE; ++I) { + if(lastNodes.count(*I)) { + DEBUG(std::cerr << "Inserting node into recc: " << **I << "\n"); + poSet.insert(*I); + lastNodes.erase(*I); + } + } + } + else + pathToRecc(*S, path, poSet, lastNodes); + } + + //Pop Node off the path + path.pop_back(); +} void ModuloSchedulingPass::computePartialOrder() { @@ -1095,7 +1265,7 @@ void ModuloSchedulingPass::computePartialOrder() { //Check if its a branch, and remove to handle special if(!found) { - if((*N)->isBranch()) { + if((*N)->isBranch() && !(*N)->hasPredecessors()) { branches.push_back(*N); } else @@ -1112,8 +1282,8 @@ void ModuloSchedulingPass::computePartialOrder() { //Add nodes that connect this recurrence to recurrences in the partial path for(std::set::iterator N = new_recurrence.begin(), - NE = new_recurrence.end(); N != NE; ++N) - searchPath(*N, path, nodesToAdd); + NE = new_recurrence.end(); N != NE; ++N) + searchPath(*N, path, nodesToAdd); //Add nodes to this recurrence if they are not already in the partial order for(std::set::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end(); @@ -1138,6 +1308,7 @@ void ModuloSchedulingPass::computePartialOrder() { //Add any nodes that are not already in the partial order //Add them in a set, one set per connected component std::set lastNodes; + std::set noPredNodes; for(std::map::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { @@ -1150,23 +1321,42 @@ void ModuloSchedulingPass::computePartialOrder() { found = true; } if(!found) { - if(I->first->isBranch()) { + if(I->first->isBranch() && !I->first->hasPredecessors()) { if(std::find(branches.begin(), branches.end(), I->first) == branches.end()) branches.push_back(I->first); } - else + else { lastNodes.insert(I->first); + if(!I->first->hasPredecessors()) + noPredNodes.insert(I->first); + } } } + //For each node w/out preds, see if there is a path to one of the + //recurrences, and if so add them to that current recc + /*for(std::set::iterator N = noPredNodes.begin(), NE = noPredNodes.end(); + N != NE; ++N) { + DEBUG(std::cerr << "No Pred Path from: " << **N << "\n"); + for(std::vector >::iterator PO = partialOrder.begin(), + PE = partialOrder.end(); PO != PE; ++PO) { + std::vector path; + pathToRecc(*N, path, *PO, lastNodes); + } + }*/ + + //Break up remaining nodes that are not in the partial order - //into their connected compoenents - while(lastNodes.size() > 0) { - std::set ccSet; - connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes); - if(ccSet.size() > 0) - partialOrder.push_back(ccSet); - } + ///into their connected compoenents + /*while(lastNodes.size() > 0) { + std::set ccSet; + connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes); + if(ccSet.size() > 0) + partialOrder.push_back(ccSet); + }*/ + if(lastNodes.size() > 0) + partialOrder.push_back(lastNodes); + //Clean up branches by putting them in final order std::map branchOrder; @@ -1184,7 +1374,7 @@ void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode *node, std::set //Add to final set if( !ccSet.count(node) && lastNodes.count(node)) { lastNodes.erase(node); -if(node->isBranch()) + if(node->isBranch() && !node->hasPredecessors()) FinalNodeOrder.push_back(node); else ccSet.insert(node); @@ -1463,7 +1653,7 @@ void ModuloSchedulingPass::orderNodes() { //return FinalNodeOrder; } -bool ModuloSchedulingPass::computeSchedule() { +bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) { TIME_REGION(X, "computeSchedule"); @@ -1487,8 +1677,17 @@ bool ModuloSchedulingPass::computeSchedule() { int LateStart = 99999; //Set to something higher then we would ever expect (FIXME) bool hasSucc = false; bool hasPred = false; - - if(!(*I)->isBranch()) { + bool sched; + + if((*I)->isBranch()) + if((*I)->hasPredecessors()) + sched = true; + else + sched = false; + else + sched = true; + + if(sched) { //Loop over nodes in the schedule and determine if they are predecessors //or successors of the node we are trying to schedule for(MSSchedule::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end(); @@ -1528,8 +1727,8 @@ bool ModuloSchedulingPass::computeSchedule() { B != BE; ++B) { if((*I)->isPredecessor(*B)) { int diff = (*I)->getInEdge(*B).getIteDiff(); - int ES_Temp = (II+count) + (*B)->getLatency() - diff * II; - DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count) << "\n"); + int ES_Temp = (II+count-1) + (*B)->getLatency() - diff * II; + DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count)-1 << "\n"); DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); EarlyStart = std::max(EarlyStart, ES_Temp); hasPred = true; @@ -1537,8 +1736,8 @@ bool ModuloSchedulingPass::computeSchedule() { if((*I)->isSuccessor(*B)) { int diff = (*B)->getInEdge(*I).getIteDiff(); - int LS_Temp = (II+count) - (*I)->getLatency() + diff * II; - DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count) << "\n"); + int LS_Temp = (II+count-1) - (*I)->getLatency() + diff * II; + DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count-1) << "\n"); DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n"); LateStart = std::min(LateStart, LS_Temp); hasSucc = true; @@ -1562,12 +1761,12 @@ bool ModuloSchedulingPass::computeSchedule() { success = scheduleNode(*I, LateStart, (LateStart - II +1)); else if(hasPred && hasSucc) { if(EarlyStart > LateStart) { - //success = false; - LateStart = EarlyStart; + success = false; + //LateStart = EarlyStart; DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n"); } - //else - success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1))); + else + success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1))); } else success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1); @@ -1583,13 +1782,14 @@ bool ModuloSchedulingPass::computeSchedule() { if(success) { DEBUG(std::cerr << "Constructing Schedule Kernel\n"); - success = schedule.constructKernel(II, branches); + success = schedule.constructKernel(II, branches, indVarInstrs[BB]); DEBUG(std::cerr << "Done Constructing Schedule Kernel\n"); if(!success) { ++IncreasedII; ++II; schedule.clear(); } + DEBUG(std::cerr << "Final II: " << II << "\n"); } if(II >= capII) { @@ -1610,12 +1810,12 @@ bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node, DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n"); //Make sure start and end are not negative - if(start < 0) { - start = 0; + //if(start < 0) { + //start = 0; - } - if(end < 0) - end = 0; + //} + //if(end < 0) + //end = 0; bool forward = true; if(start > end) @@ -1652,7 +1852,7 @@ bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node, return success; } -void ModuloSchedulingPass::writePrologues(std::vector &prologues, MachineBasicBlock *origBB, std::vector &llvm_prologues, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation) { +void ModuloSchedulingPass::writePrologues(std::vector &prologues, MachineBasicBlock *origBB, std::vector &llvm_prologues, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation) { //Keep a map to easily know whats in the kernel std::map > inKernel; @@ -1671,15 +1871,9 @@ void ModuloSchedulingPass::writePrologues(std::vector &prol for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { maxStageCount = std::max(maxStageCount, I->second); - //Ignore the branch, we will handle this separately - if(I->first->isBranch()) { - branches.push_back(I->first); - continue; - } - //Put int the map so we know what instructions in each stage are in the kernel - DEBUG(std::cerr << "Inserting instruction " << *(I->first->getInst()) << " into map at stage " << I->second << "\n"); - inKernel[I->second].insert(I->first->getInst()); + DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n"); + inKernel[I->second].insert(I->first); } //Get target information to look at machine operands @@ -1691,18 +1885,23 @@ void ModuloSchedulingPass::writePrologues(std::vector &prol MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); DEBUG(std::cerr << "i=" << i << "\n"); - for(int j = 0; j <= i; ++j) { + for(int j = i; j >= 0; --j) { for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { if(inKernel[j].count(&*MI)) { MachineInstr *instClone = MI->clone(); machineBB->push_back(instClone); - + + //If its a branch, insert a nop + if(mii->isBranch(instClone->getOpcode())) + BuildMI(machineBB, V9::NOP, 0); + + DEBUG(std::cerr << "Cloning: " << *MI << "\n"); - Instruction *tmp; - //After cloning, we may need to save the value that this instruction defines for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) { + Instruction *tmp; + //get machine operand MachineOperand &mOp = instClone->getOperand(opNum); if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { @@ -1724,8 +1923,15 @@ void ModuloSchedulingPass::writePrologues(std::vector &prol DEBUG(std::cerr << "Machine Instr Operands: " << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n"); //Create machine instruction and put int machineBB - MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); - + MachineInstr *saveValue; + if(mOp.getVRegValue()->getType() == Type::FloatTy) + saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else if(mOp.getVRegValue()->getType() == Type::DoubleTy) + saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else + saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + + DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n"); } } @@ -1758,14 +1964,14 @@ void ModuloSchedulingPass::writePrologues(std::vector &prol } - for(std::vector::iterator BR = branches.begin(), BE = branches.end(); BR != BE; ++BR) { + /*for(std::vector::iterator BR = branches.begin(), BE = branches.end(); BR != BE; ++BR) { //Stick in branch at the end machineBB->push_back((*BR)->getInst()->clone()); //Add nop BuildMI(machineBB, V9::NOP, 0); - } + }*/ (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); @@ -1774,18 +1980,18 @@ void ModuloSchedulingPass::writePrologues(std::vector &prol } } -void ModuloSchedulingPass::writeEpilogues(std::vector &epilogues, const MachineBasicBlock *origBB, std::vector &llvm_epilogues, std::map > &valuesToSave, std::map > &newValues,std::map &newValLocation, std::map > &kernelPHIs ) { +void ModuloSchedulingPass::writeEpilogues(std::vector &epilogues, const MachineBasicBlock *origBB, std::vector &llvm_epilogues, std::map > &valuesToSave, std::map > &newValues,std::map &newValLocation, std::map > &kernelPHIs ) { std::map > inKernel; for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { //Ignore the branch, we will handle this separately - if(I->first->isBranch()) - continue; + //if(I->first->isBranch()) + //continue; //Put int the map so we know what instructions in each stage are in the kernel - inKernel[I->second].insert(I->first->getInst()); + inKernel[I->second].insert(I->first); } std::map valPHIs; @@ -1827,7 +2033,7 @@ void ModuloSchedulingPass::writeEpilogues(std::vector &epil if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) { - DEBUG(std::cerr << "Writing PHI for " << *(mOp.getVRegValue()) << "\n"); + DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n"); //If this is the last instructions for the max iterations ago, don't update operands if(inEpilogue.count(mOp.getVRegValue())) @@ -1843,6 +2049,9 @@ void ModuloSchedulingPass::writeEpilogues(std::vector &epil MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); tempMvec.addTemp((Value*) tmp); + //assert of no kernelPHI for this value + assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi"); + MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp); DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); valPHIs[mOp.getVRegValue()] = tmp; @@ -1872,7 +2081,7 @@ void ModuloSchedulingPass::writeEpilogues(std::vector &epil } } -void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs) { +void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs) { //Keep track of operands that are read and saved from a previous iteration. The new clone //instruction will use the result of the phi instead. @@ -1882,26 +2091,32 @@ void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *ma //Branches are a special case std::vector branches; - //Create TmpInstructions for the final phis - for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { + //Get target information to look at machine operands + const TargetInstrInfo *mii = target.getInstrInfo(); + + //Create TmpInstructions for the final phis + for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { - DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first->getInst()) << "\n";); + DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";); - if(I->first->isBranch()) { + /*if(I->first->isBranch()) { //Clone instruction const MachineInstr *inst = I->first->getInst(); MachineInstr *instClone = inst->clone(); branches.push_back(instClone); continue; - } + }*/ //Clone instruction - const MachineInstr *inst = I->first->getInst(); + const MachineInstr *inst = I->first; MachineInstr *instClone = inst->clone(); //Insert into machine basic block machineBB->push_back(instClone); + if(mii->isBranch(instClone->getOpcode())) + BuildMI(machineBB, V9::NOP, 0); + DEBUG(std::cerr << "Cloned Inst: " << *instClone << "\n"); //Loop over Machine Operands @@ -1953,7 +2168,14 @@ void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *ma tempVec.addTemp((Value*) tmp); //Create new machine instr and put in MBB - MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + MachineInstr *saveValue; + if(mOp.getVRegValue()->getType() == Type::FloatTy) + saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else if(mOp.getVRegValue()->getType() == Type::DoubleTy) + saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else + saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + //Save for future cleanup kernelValue[mOp.getVRegValue()] = tmp; @@ -1994,9 +2216,10 @@ void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *ma if(V->second.size() == 1) { assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi"); MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]); - DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); - kernelPHIs[V->first][schedule.getMaxStage()-1] = kernelValue[V->first]; - } + DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n"); + kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first]; + DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n"); + } else { //Keep track of last phi created. @@ -2099,7 +2322,13 @@ void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vect if(TMI->isBranch(opc) || TMI->isNop(opc)) continue; else { - BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + if(mOp.getVRegValue()->getType() == Type::FloatTy) + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else if(mOp.getVRegValue()->getType() == Type::DoubleTy) + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + break; } @@ -2110,7 +2339,14 @@ void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vect //Remove the phi and replace it with an OR DEBUG(std::cerr << "Def: " << mOp << "\n"); //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue())); - BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); + if(tmp->getType() == Type::FloatTy) + BuildMI(*kernelBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); + else if(tmp->getType() == Type::DoubleTy) + BuildMI(*kernelBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); + else + BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); + + worklist.push_back(std::make_pair(kernelBB, I)); } @@ -2162,7 +2398,14 @@ void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vect if(TMI->isBranch(opc) || TMI->isNop(opc)) continue; else { - BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + if(mOp.getVRegValue()->getType() == Type::FloatTy) + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else if(mOp.getVRegValue()->getType() == Type::DoubleTy) + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); + else + BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); + + break; } @@ -2172,7 +2415,13 @@ void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vect else { //Remove the phi and replace it with an OR DEBUG(std::cerr << "Def: " << mOp << "\n"); - BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); + if(tmp->getType() == Type::FloatTy) + BuildMI(**MB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); + else if(tmp->getType() == Type::DoubleTy) + BuildMI(**MB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); + else + BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); + worklist.push_back(std::make_pair(*MB,I)); } @@ -2213,7 +2462,7 @@ void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) { DEBUG(std::cerr << "Reconstructing Loop\n"); //First find the value *'s that we need to "save" - std::map > valuesToSave; + std::map > valuesToSave; //Keep track of instructions we have already seen and their stage because //we don't want to "save" values if they are used in the kernel immediately @@ -2226,7 +2475,7 @@ void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) { if(I->second !=0) { //For this instruction, get the Value*'s that it reads and put them into the set. //Assert if there is an operand of another type that we need to save - const MachineInstr *inst = I->first->getInst(); + const MachineInstr *inst = I->first; lastInstrs[inst] = I->second; for(unsigned i=0; i < inst->getNumOperands(); ++i) { diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h index b2d9ccb9d22..b36c5b2c20a 100644 --- a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h +++ b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h @@ -17,6 +17,8 @@ #include "MSSchedule.h" #include "llvm/Function.h" #include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" #include namespace llvm { @@ -42,6 +44,9 @@ namespace llvm { //Map to hold Value* defs std::map defMap; + //Map to hold list of instructions associate to the induction var for each BB + std::map > indVarInstrs; + //LLVM Instruction we know we can add TmpInstructions to its MCFI Instruction *defaultInst; @@ -69,6 +74,8 @@ namespace llvm { //Internal functions bool CreateDefMap(MachineBasicBlock *BI); bool MachineBBisValid(const MachineBasicBlock *BI); + bool assocIndVar(Instruction *I, std::set &indVar, + std::vector &stack, BasicBlock *BB); int calculateResMII(const MachineBasicBlock *BI); int calculateRecMII(MSchedGraph *graph, int MII); void calculateNodeAttributes(MSchedGraph *graph, int MII); @@ -101,9 +108,13 @@ namespace llvm { std::vector &path, std::set &nodesToAdd); + void pathToRecc(MSchedGraphNode *node, + std::vector &path, + std::set &poSet, std::set &lastNodes); + void computePartialOrder(); - bool computeSchedule(); + bool computeSchedule(const MachineBasicBlock *BB); bool scheduleNode(MSchedGraphNode *node, int start, int end); @@ -116,12 +127,12 @@ namespace llvm { void fixBranches(std::vector &prologues, std::vector &llvm_prologues, MachineBasicBlock *machineBB, BasicBlock *llvmBB, std::vector &epilogues, std::vector &llvm_epilogues, MachineBasicBlock*); - void writePrologues(std::vector &prologues, MachineBasicBlock *origBB, std::vector &llvm_prologues, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation); + void writePrologues(std::vector &prologues, MachineBasicBlock *origBB, std::vector &llvm_prologues, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation); - void writeEpilogues(std::vector &epilogues, const MachineBasicBlock *origBB, std::vector &llvm_epilogues, std::map > &valuesToSave,std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs); + void writeEpilogues(std::vector &epilogues, const MachineBasicBlock *origBB, std::vector &llvm_epilogues, std::map > &valuesToSave,std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs); - void writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs); + void writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map > &valuesToSave, std::map > &newValues, std::map &newValLocation, std::map > &kernelPHIs); void removePHIs(const MachineBasicBlock *origBB, std::vector &prologues, std::vector &epilogues, MachineBasicBlock *kernelBB, std::map &newValLocation); @@ -131,6 +142,13 @@ namespace llvm { ModuloSchedulingPass(TargetMachine &targ) : target(targ) {} virtual bool runOnFunction(Function &F); virtual const char* getPassName() const { return "ModuloScheduling"; } + + // getAnalysisUsage + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + } + }; }