diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 85ab47beb6b..2567a657338 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -85,6 +85,8 @@ namespace llvm { /// the value of the Latency field of the predecessor, however advanced /// models may provide additional information about specific edges. unsigned Latency; + /// Record MinLatency seperately from "expected" Latency. + unsigned MinLatency; public: /// SDep - Construct a null SDep. This is only for use by container @@ -96,7 +98,7 @@ namespace llvm { SDep(SUnit *S, Kind kind, unsigned latency = 1, unsigned Reg = 0, bool isNormalMemory = false, bool isMustAlias = false, bool isArtificial = false) - : Dep(S, kind), Contents(), Latency(latency) { + : Dep(S, kind), Contents(), Latency(latency), MinLatency(latency) { switch (kind) { case Anti: case Output: @@ -135,7 +137,8 @@ namespace llvm { } bool operator==(const SDep &Other) const { - return overlaps(Other) && Latency == Other.Latency; + return overlaps(Other) + && Latency == Other.Latency && MinLatency == Other.MinLatency; } bool operator!=(const SDep &Other) const { @@ -155,6 +158,18 @@ namespace llvm { Latency = Lat; } + /// getMinLatency - Return the minimum latency for this edge. Minimum + /// latency is used for scheduling groups, while normal (expected) latency + /// is for instruction cost and critical path. + unsigned getMinLatency() const { + return MinLatency; + } + + /// setMinLatency - Set the minimum latency for this edge. + void setMinLatency(unsigned Lat) { + MinLatency = Lat; + } + //// getSUnit - Return the SUnit to which this edge points. SUnit *getSUnit() const { return Dep.getPointer(); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 1bde94215a5..8b52b5a9c70 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -108,6 +108,15 @@ namespace llvm { } }; + /// Record a physical register access. + /// For non data-dependent uses, OpIdx == -1. + struct PhysRegSUOper { + SUnit *SU; + int OpIdx; + + PhysRegSUOper(SUnit *su, int op): SU(su), OpIdx(op) {} + }; + /// Combine a SparseSet with a 1x1 vector to track physical registers. /// The SparseSet allows iterating over the (few) live registers for quickly /// comparing against a regmask or clearing the set. @@ -116,7 +125,7 @@ namespace llvm { /// cleared between scheduling regions without freeing unused entries. class Reg2SUnitsMap { SparseSet PhysRegSet; - std::vector > SUnits; + std::vector > SUnits; public: typedef SparseSet::const_iterator const_iterator; @@ -140,7 +149,7 @@ namespace llvm { /// If this register is mapped, return its existing SUnits vector. /// Otherwise map the register and return an empty SUnits vector. - std::vector &operator[](unsigned Reg) { + std::vector &operator[](unsigned Reg) { bool New = PhysRegSet.insert(Reg).second; assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); (void)New; @@ -288,16 +297,6 @@ namespace llvm { /// virtual void computeLatency(SUnit *SU); - /// computeOperandLatency - Return dependence edge latency using - /// operand use/def information - /// - /// FindMin may be set to get the minimum vs. expected latency. Minimum - /// latency is used for scheduling groups, while expected latency is for - /// instruction cost and critical path. - virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin = false) const; - /// schedule - Order nodes according to selected style, filling /// in the Sequence member. /// @@ -319,7 +318,7 @@ namespace llvm { protected: void initSUnits(); - void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); + void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index da30ab82d6c..44953a287ad 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -794,20 +794,6 @@ public: const MachineInstr *UseMI, unsigned UseIdx, bool FindMin = false) const; - /// computeOperandLatency - Compute and return the latency of the given data - /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for - /// an unknown use. If the subtarget allows, this may or may not need to call - /// getOperandLatency(). - /// - /// FindMin may be set to get the minimum vs. expected latency. Minimum - /// latency is used for scheduling groups, while expected latency is for - /// instruction cost and critical path. - unsigned computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, - const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const; - /// getOutputLatency - Compute and return the output dependency latency of a /// a given pair of defs which both target the same register. This is usually /// one. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ffb876b3f05..eeaabc7bd4c 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -905,13 +905,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -925,13 +924,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; } Bot.releaseNode(SU, SU->BotReadyCycle); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 9c1dba355b4..dfad18b394e 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -209,7 +209,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); addVRegUseDeps(&ExitSU, i); @@ -225,15 +225,15 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); } } } /// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. -void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, - const MachineOperand &MO) { +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. @@ -245,11 +245,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector &UseList = Uses[*Alias]; + std::vector &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; + SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; + MachineInstr *UseMI = UseSU->getInstr(); + int UseOp = UseList[i].OpIdx; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. @@ -258,7 +260,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); @@ -273,8 +274,15 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - unsigned Latency = computeOperandLatency(SU, UseSU, dep); + unsigned Latency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); ST.adjustSchedDependency(SU, UseSU, dep); } @@ -301,9 +309,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector &DefList = Defs[*Alias]; + std::vector &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + SUnit *DefSU = DefList[i].SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -324,14 +332,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(SU); + Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); } else { - addPhysRegDataDeps(SU, MO); + addPhysRegDataDeps(SU, OperIdx); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector &DefList = Defs[MO.getReg()]; + std::vector &DefList = Defs[MO.getReg()]; // If a def is going to wrap back around to the top of the loop, // backschedule it. @@ -393,11 +401,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // the block. Instead, we leave only one call at the back of the // DefList. if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) + while (!DefList.empty() && DefList.back().SU->isCall) DefList.pop_back(); } // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(SU); + DefList.push_back(PhysRegSUOper(SU, OperIdx)); } } @@ -468,8 +476,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - unsigned Latency = computeOperandLatency(DefSU, SU, const_cast(dep)); + int DefOp = Def->findRegisterDefOperandIdx(Reg); + unsigned Latency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); const TargetSubtargetInfo &ST = TM.getSubtarget(); ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); @@ -997,17 +1011,6 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin) const { - // For a data dependency with a known register... - if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return 1; - - return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), - Use->getInstr(), dep.getReg(), FindMin); -} - void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { SU->getInstr()->dump(); } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index ddee6b24016..59d62bba540 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -645,9 +645,16 @@ static int computeDefOperandLatency( } /// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. /// -/// FindMin may be set to get the minimum vs. expected latency. +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -660,7 +667,13 @@ computeOperandLatency(const InstrItineraryData *ItinData, assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } if (OperLatency >= 0) return OperLatency; @@ -673,77 +686,3 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an -/// unknown use. Depending on the subtarget's itinerary properties, this may or -/// may not need to call getOperandLatency(). -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. -/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const { - - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - // Find the definition of the register in the defining instruction. - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6, %D7 = VLD1q16 %R2, 0, ..., %Q3 - // %Q1 = VMULv8i16 %Q1, %Q3, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - // For all uses of the register, calculate the maxmimum latency - int OperLatency = -1; - - // UseMI is null, then it must be a scheduling barrier. - if (!UseMI) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - else { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); - OperLatency = std::max(OperLatency, UseCycle); - } - } - // If we found an operand latency, we're done. - if (OperLatency >= 0) - return OperLatency; - } - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -}