diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 03d11e2d828..765c26a5941 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -485,7 +485,7 @@ namespace llvm { protected: void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO); - void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap); + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap); /// ForceUnitLatencies - Return true if all scheduling edges should be given a /// latency value of one. The default is to return false; schedulers may diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index d10d670d346..1f40771e3bd 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() { TII->insertNoop(*BB, BB->end()); } -void ScheduleDAG::EmitCrossRCCopy(SUnit *SU, +void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { @@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU, unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { - if (I->getReg()) { - Reg = I->getReg(); + if (II->getReg()) { + Reg = II->getReg(); break; } } - assert(I->getReg() && "Unknown physical register!"); TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second, SU->CopyDstRC, SU->CopySrcRC); } else { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 113dfb1751d..b86492992c4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -28,7 +28,7 @@ using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); -STATISTIC(NumCCCopies, "Number of cross class copies"); +STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", @@ -93,10 +93,10 @@ private: void ReleasePred(SUnit *SU, SDep *PredEdge); void ScheduleNodeBottomUp(SUnit*, unsigned); SUnit *CopyAndMoveSuccessors(SUnit*); - void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, - const TargetRegisterClass*, - const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); @@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DelDeps.push_back(std::make_pair(SuccSU, D)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - } ++NumDups; return NewSU; } -/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies -/// and move all scheduled successors of the given SUnit to the last copy. -void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { @@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); - ++NumCCCopies; + ++NumPRCopies; } /// getPhysicalRegisterVT - Returns the ValueType of the physical register @@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; if (!NewDef) { - // Issue expensive cross register class copies. - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - if (!DestRC) { - assert(false && "Don't know how to copy this physical register!"); - abort(); - } + // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; - InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DOUT << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"; AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 65de7a5c8f8..bc5443eaba8 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -35,7 +35,7 @@ using namespace llvm; STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); -STATISTIC(NumCCCopies, "Number of cross class copies"); +STATISTIC(NumPRCopies, "Number of physical register copies"); static RegisterScheduler burrListDAGScheduler("list-burr", @@ -121,10 +121,10 @@ private: void UnscheduleNodeBottomUp(SUnit*); void BacktrackBottomUp(SUnit*, unsigned, unsigned&); SUnit *CopyAndMoveSuccessors(SUnit*); - void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, - const TargetRegisterClass*, - const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleTopDown(); void ListScheduleBottomUp(); @@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return NewSU; } -/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies -/// and move all scheduled successors of the given SUnit to the last copy. -void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; @@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, DelDeps.push_back(std::make_pair(SuccSU, *I)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - } AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); @@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); - ++NumCCCopies; + ++NumPRCopies; } /// getPhysicalRegisterVT - Returns the ValueType of the physical register @@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { } if (!CurSU) { - // Can't backtrack. Try duplicating the nodes that produces these - // "expensive to copy" values to break the dependency. In case even - // that doesn't work, insert cross class copies. + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = NotReady[0]; SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; if (!NewDef) { - // Issue expensive cross register class copies. - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - if (!DestRC) { - assert(false && "Don't know how to copy this physical register!"); - abort(); - } + // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; - InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DOUT << "Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"; AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index a8d904c2f67..c755086a8d5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { /// CheckForPhysRegDependency - Check if the dependency between def and use of /// a specified operand is a physical register dependency. If so, returns the -/// register. +/// register and the cost of copying the register. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - unsigned &PhysReg) { + unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; @@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } } } @@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() { bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; + int Cost = 1; // Determine if this is a physical register dependency. - CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg)); } @@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { - if (SU->getNode()) - SU->getNode()->dump(DAG); - else - cerr << "CROSS RC COPY "; + if (!SU->getNode()) { + cerr << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); cerr << "\n"; SmallVector<SDNode *, 4> FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp index ebe084d811f..d6179651589 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + SmallVector<SDNode *, 4> FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); @@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap); FlaggedNodes.pop_back(); } - if (!SU->getNode()) - EmitCrossRCCopy(SU, CopyVRBaseMap); - else - EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap); + EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap); } return BB; diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll new file mode 100644 index 00000000000..0765f86405c --- /dev/null +++ b/test/CodeGen/X86/pr3244.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as < %s | llc -march=x86 +; PR3244 + +@g_62 = external global i16 ; <i16*> [#uses=1] +@g_487 = external global i32 ; <i32*> [#uses=1] + +define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind { +entry: + %0 = load i16* @g_62, align 2 ; <i16> [#uses=1] + %1 = load i32* @g_487, align 4 ; <i32> [#uses=1] + %2 = trunc i16 %0 to i8 ; <i8> [#uses=1] + %3 = trunc i32 %1 to i8 ; <i8> [#uses=1] + %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1) +nounwind ; <i32> [#uses=1] + %5 = trunc i32 %4 to i8 ; <i8> [#uses=1] + %6 = mul i8 %3, %2 ; <i8> [#uses=1] + %7 = mul i8 %6, %5 ; <i8> [#uses=1] + %8 = sext i8 %7 to i16 ; <i16> [#uses=1] + %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind + ; <i32> [#uses=0] + ret i32 undef +} + +declare i32 @func_7(...) + +declare i32 @func_85(i16 signext, i32, i32)