Simplify the computeOperandLatency API.

The logic for recomputing latency based on a ScheduleDAG edge was shady. This bypasses the problem by requiring the client to provide operand indices. This ensures consistent use of the machine model's API. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162420 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-26 05:25:47 +00:00 · 2012-08-23 00:39:43 +00:00
parent fc4eafa0f4
commit ffd2526fa4
6 changed files with 83 additions and 143 deletions
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -85,6 +85,8 @@ namespace llvm {
    /// the value of the Latency field of the predecessor, however advanced
    /// models may provide additional information about specific edges.
    unsigned Latency;
    /// Record MinLatency seperately from "expected" Latency.
    unsigned MinLatency;
  public:
    /// SDep - Construct a null SDep. This is only for use by container
@@ -96,7 +98,7 @@ namespace llvm {
    SDep(SUnit *S, Kind kind, unsigned latency = 1, unsigned Reg = 0,
         bool isNormalMemory = false, bool isMustAlias = false,
         bool isArtificial = false)
-      : Dep(S, kind), Contents(), Latency(latency) {
+      : Dep(S, kind), Contents(), Latency(latency), MinLatency(latency) {
      switch (kind) {
      case Anti:
      case Output:
@@ -135,7 +137,8 @@ namespace llvm {
    }
    bool operator==(const SDep &Other) const {
-      return overlaps(Other) && Latency == Other.Latency;
+      return overlaps(Other)
        && Latency == Other.Latency && MinLatency == Other.MinLatency;
    }
    bool operator!=(const SDep &Other) const {
@@ -155,6 +158,18 @@ namespace llvm {
      Latency = Lat;
    }
    /// getMinLatency - Return the minimum latency for this edge. Minimum
    /// latency is used for scheduling groups, while normal (expected) latency
    /// is for instruction cost and critical path.
    unsigned getMinLatency() const {
      return MinLatency;
    }
    /// setMinLatency - Set the minimum latency for this edge.
    void setMinLatency(unsigned Lat) {
      MinLatency = Lat;
    }
    //// getSUnit - Return the SUnit to which this edge points.
    SUnit *getSUnit() const {
      return Dep.getPointer();
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -108,6 +108,15 @@ namespace llvm {
    }
  };
  /// Record a physical register access.
  /// For non data-dependent uses, OpIdx == -1.
  struct PhysRegSUOper {
    SUnit *SU;
    int OpIdx;
    PhysRegSUOper(SUnit *su, int op): SU(su), OpIdx(op) {}
  };
  /// Combine a SparseSet with a 1x1 vector to track physical registers.
  /// The SparseSet allows iterating over the (few) live registers for quickly
  /// comparing against a regmask or clearing the set.
@@ -116,7 +125,7 @@ namespace llvm {
  /// cleared between scheduling regions without freeing unused entries.
  class Reg2SUnitsMap {
    SparseSet<unsigned> PhysRegSet;
-    std::vector<std::vector<SUnit*> > SUnits;
+    std::vector<std::vector<PhysRegSUOper> > SUnits;
  public:
    typedef SparseSet<unsigned>::const_iterator const_iterator;
@@ -140,7 +149,7 @@ namespace llvm {
    /// If this register is mapped, return its existing SUnits vector.
    /// Otherwise map the register and return an empty SUnits vector.
-    std::vector<SUnit *> &operator[](unsigned Reg) {
+    std::vector<PhysRegSUOper> &operator[](unsigned Reg) {
      bool New = PhysRegSet.insert(Reg).second;
      assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
      (void)New;
@@ -288,16 +297,6 @@ namespace llvm {
    ///
    virtual void computeLatency(SUnit *SU);
    /// computeOperandLatency - Return dependence edge latency using
    /// operand use/def information
    ///
    /// FindMin may be set to get the minimum vs. expected latency. Minimum
    /// latency is used for scheduling groups, while expected latency is for
    /// instruction cost and critical path.
    virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
                                           const SDep& dep,
                                           bool FindMin = false) const;
    /// schedule - Order nodes according to selected style, filling
    /// in the Sequence member.
    ///
@@ -319,7 +318,7 @@ namespace llvm {
  protected:
    void initSUnits();
-    void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO);
+    void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
    void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
    void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
    void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -794,20 +794,6 @@ public:
                                 const MachineInstr *UseMI, unsigned UseIdx,
                                 bool FindMin = false) const;
  /// computeOperandLatency - Compute and return the latency of the given data
  /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for
  /// an unknown use. If the subtarget allows, this may or may not need to call
  /// getOperandLatency().
  ///
  /// FindMin may be set to get the minimum vs. expected latency. Minimum
  /// latency is used for scheduling groups, while expected latency is for
  /// instruction cost and critical path.
  unsigned computeOperandLatency(const InstrItineraryData *ItinData,
                                 const TargetRegisterInfo *TRI,
                                 const MachineInstr *DefMI,
                                 const MachineInstr *UseMI,
                                 unsigned Reg, bool FindMin) const;
  /// getOutputLatency - Compute and return the output dependency latency of a
  /// a given pair of defs which both target the same register. This is usually
  /// one.
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -905,13 +905,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I) {
    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
-    unsigned Latency =
+    unsigned MinLatency = I->getMinLatency();
      DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
 #ifndef NDEBUG
-    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 #endif
-    if (SU->TopReadyCycle < PredReadyCycle + Latency)
+    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
-      SU->TopReadyCycle = PredReadyCycle + Latency;
+      SU->TopReadyCycle = PredReadyCycle + MinLatency;
  }
  Top.releaseNode(SU, SU->TopReadyCycle);
 }
@@ -925,13 +924,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
       I != E; ++I) {
    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
-    unsigned Latency =
+    unsigned MinLatency = I->getMinLatency();
      DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
 #ifndef NDEBUG
-    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 #endif
-    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
-      SU->BotReadyCycle = SuccReadyCycle + Latency;
+      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
  }
  Bot.releaseNode(SU, SU->BotReadyCycle);
 }
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -209,7 +209,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
      if (Reg == 0) continue;
      if (TRI->isPhysicalRegister(Reg))
-        Uses[Reg].push_back(&ExitSU);
+        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
      else {
        assert(!IsPostRA && "Virtual register encountered after regalloc.");
        addVRegUseDeps(&ExitSU, i);
@@ -225,15 +225,15 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
             E = (*SI)->livein_end(); I != E; ++I) {
        unsigned Reg = *I;
        if (!Uses.contains(Reg))
-          Uses[Reg].push_back(&ExitSU);
+          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
      }
  }
 }
 /// MO is an operand of SU's instruction that defines a physical register. Add
 /// data dependencies from SU to any uses of the physical register.
-void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
-                                           const MachineOperand &MO) {
+  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
  assert(MO.isDef() && "expect physreg def");
  // Ask the target if address-backscheduling is desirable, and if so how much.
@@ -245,11 +245,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
       Alias.isValid(); ++Alias) {
    if (!Uses.contains(*Alias))
      continue;
-    std::vector<SUnit*> &UseList = Uses[*Alias];
+    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-      SUnit *UseSU = UseList[i];
+      SUnit *UseSU = UseList[i].SU;
      if (UseSU == SU)
        continue;
      MachineInstr *UseMI = UseSU->getInstr();
      int UseOp = UseList[i].OpIdx;
      unsigned LDataLatency = DataLatency;
      // Optionally add in a special extra latency for nodes that
      // feed addresses.
@@ -258,7 +260,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
      // adjustSchedDependency for the targets that care about it.
      if (SpecialAddressLatency != 0 && !UnitLatencies &&
          UseSU != &ExitSU) {
        MachineInstr *UseMI = UseSU->getInstr();
        const MCInstrDesc &UseMCID = UseMI->getDesc();
        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
@@ -273,8 +274,15 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
      // perform its own adjustments.
      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
      if (!UnitLatencies) {
-        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+        unsigned Latency =
          TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx,
                                     (UseOp < 0 ? 0 : UseMI), UseOp);
        dep.setLatency(Latency);
        unsigned MinLatency =
          TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx,
                                     (UseOp < 0 ? 0 : UseMI), UseOp,
                                     /*FindMin=*/true);
        dep.setMinLatency(MinLatency);
        ST.adjustSchedDependency(SU, UseSU, dep);
      }
@@ -301,9 +309,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
       Alias.isValid(); ++Alias) {
    if (!Defs.contains(*Alias))
      continue;
-    std::vector<SUnit *> &DefList = Defs[*Alias];
+    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
    for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-      SUnit *DefSU = DefList[i];
+      SUnit *DefSU = DefList[i].SU;
      if (DefSU == &ExitSU)
        continue;
      if (DefSU != SU &&
@@ -324,14 +332,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
    // retrieve the existing SUnits list for this register's uses.
    // Push this SUnit on the use list.
-    Uses[MO.getReg()].push_back(SU);
+    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
  }
  else {
-    addPhysRegDataDeps(SU, MO);
+    addPhysRegDataDeps(SU, OperIdx);
    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
    // retrieve the existing SUnits list for this register's defs.
-    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
    // If a def is going to wrap back around to the top of the loop,
    // backschedule it.
@@ -393,11 +401,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
    // the block. Instead, we leave only one call at the back of the
    // DefList.
    if (SU->isCall) {
-      while (!DefList.empty() && DefList.back()->isCall)
+      while (!DefList.empty() && DefList.back().SU->isCall)
        DefList.pop_back();
    }
    // Defs are pushed in the order they are visited and never reordered.
-    DefList.push_back(SU);
+    DefList.push_back(PhysRegSUOper(SU, OperIdx));
  }
 }
@@ -468,8 +476,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
      if (!UnitLatencies) {
        // Adjust the dependence latency using operand def/use information, then
        // allow the target to perform its own adjustments.
-        unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        int DefOp = Def->findRegisterDefOperandIdx(Reg);
        unsigned Latency =
          TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx);
        dep.setLatency(Latency);
        unsigned MinLatency =
          TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx,
                                     /*FindMin=*/true);
        dep.setMinLatency(MinLatency);
        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
@@ -997,17 +1011,6 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
  }
 }
 unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
                                                  const SDep& dep,
                                                  bool FindMin) const {
  // For a data dependency with a known register...
  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
    return 1;
  return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
                                    Use->getInstr(), dep.getReg(), FindMin);
 }
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
  SU->getInstr()->dump();
 }
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -645,9 +645,16 @@ static int computeDefOperandLatency(
 }
 /// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use when the operand indices are already known.
+/// dependent def and use when the operand indices are already known. UseMI may
 /// be NULL for an unknown use.
 ///
-/// FindMin may be set to get the minimum vs. expected latency.
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
 /// latency is used for scheduling groups, while expected latency is for
 /// instruction cost and critical path.
 ///
 /// Depending on the subtarget's itinerary properties, this may or may not need
 /// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
 /// UseIdx to compute min latency.
 unsigned TargetInstrInfo::
 computeOperandLatency(const InstrItineraryData *ItinData,
                      const MachineInstr *DefMI, unsigned DefIdx,
@@ -660,7 +667,13 @@ computeOperandLatency(const InstrItineraryData *ItinData,
  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-  int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  int OperLatency = 0;
  if (UseMI)
    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
  else {
    unsigned DefClass = DefMI->getDesc().getSchedClass();
    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
  }
  if (OperLatency >= 0)
    return OperLatency;
@@ -673,77 +686,3 @@ computeOperandLatency(const InstrItineraryData *ItinData,
                            defaultDefLatency(ItinData->SchedModel, DefMI));
  return InstrLatency;
 }
 /// computeOperandLatency - Compute and return the latency of the given data
 /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
 /// unknown use. Depending on the subtarget's itinerary properties, this may or
 /// may not need to call getOperandLatency().
 ///
 /// FindMin may be set to get the minimum vs. expected latency. Minimum
 /// latency is used for scheduling groups, while expected latency is for
 /// instruction cost and critical path.
 ///
 /// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
 /// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
 unsigned TargetInstrInfo::
 computeOperandLatency(const InstrItineraryData *ItinData,
                      const TargetRegisterInfo *TRI,
                      const MachineInstr *DefMI, const MachineInstr *UseMI,
                      unsigned Reg, bool FindMin) const {
  int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
  if (DefLatency >= 0)
    return DefLatency;
  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
  // Find the definition of the register in the defining instruction.
  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
  if (DefIdx != -1) {
    const MachineOperand &MO = DefMI->getOperand(DefIdx);
    if (MO.isReg() && MO.isImplicit() &&
        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
      // This is an implicit def, getOperandLatency() won't return the correct
      // latency. e.g.
      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
      // What we want is to compute latency between def of %D6/%D7 and use of
      // %Q3 instead.
      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
      if (DefMI->getOperand(Op2).isReg())
        DefIdx = Op2;
    }
    // For all uses of the register, calculate the maxmimum latency
    int OperLatency = -1;
    // UseMI is null, then it must be a scheduling barrier.
    if (!UseMI) {
      unsigned DefClass = DefMI->getDesc().getSchedClass();
      OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
    }
    else {
      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = UseMI->getOperand(i);
        if (!MO.isReg() || !MO.isUse())
          continue;
        unsigned MOReg = MO.getReg();
        if (MOReg != Reg)
          continue;
        int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
        OperLatency = std::max(OperLatency, UseCycle);
      }
    }
    // If we found an operand latency, we're done.
    if (OperLatency >= 0)
      return OperLatency;
  }
  // No operand latency was found.
  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
  // Expected latency is the max of the stage latency and itinerary props.
  if (!FindMin)
    InstrLatency = std::max(InstrLatency,
                            defaultDefLatency(ItinData->SchedModel, DefMI));
  return InstrLatency;
 }