MachineCombiner Pass for selecting faster instruction

sequence - target independent framework When the DAGcombiner selects instruction sequences it could increase the critical path or resource len. For example, on arm64 there are multiply-accumulate instructions (madd, msub). If e.g. the equivalent multiply-add sequence is not on the crictial path it makes sense to select it instead of the combined, single accumulate instruction (madd/msub). The reason is that the conversion from add+mul to the madd could lengthen the critical path by the latency of the multiply. But the DAGCombiner would always combine and select the madd/msub instruction. This patch uses machine trace metrics to estimate critical path length and resource length of an original instruction sequence vs a combined instruction sequence and picks the faster code based on its estimates. This patch only commits the target independent framework that evaluates and selects code sequences. The machine instruction combiner is turned off for all targets and expected to evolve over time by gradually handling DAGCombiner pattern in the target specific code. This framework lays the groundwork for fixing rdar://16319955 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214666 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-18 11:24:01 +00:00 · 2014-08-03 21:35:39 +00:00
parent 83eaeba2a8
commit b0b708854e
11 changed files with 588 additions and 18 deletions
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@ -1169,6 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
  return DepCycle;
 }

+/// When bottom is set include instructions in current block in estimate.
 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  // Find the limiting processor resource.
  // Numbers have been pre-scaled to be comparable.
@ -1185,7 +1186,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  // Convert to cycle count.
  PRMax = TE.MTM.getCycles(PRMax);

+  /// All instructions before current block
  unsigned Instrs = TBI.InstrDepth;
+  // plus instructions in current block
  if (Bottom)
    Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
@ -1194,44 +1197,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  return std::max(Instrs, PRMax);
 }

-
-unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
-                  ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+    ArrayRef<const MachineBasicBlock *> Extrablocks,
+    ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+    ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
  // Add up resources above and below the center block.
  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
  unsigned PRMax = 0;
+
+  // Capture computing cycles from extra instructions
+  auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+                            unsigned ResourceIdx)
+                         ->unsigned {
+    unsigned Cycles = 0;
+    for (unsigned I = 0; I != Instrs.size(); ++I) {
+      const MCSchedClassDesc *SC = Instrs[I];
+      if (!SC->isValid())
+        continue;
+      for (TargetSchedModel::ProcResIter
+               PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+               PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+           PI != PE; ++PI) {
+        if (PI->ProcResourceIdx != ResourceIdx)
+          continue;
+        Cycles +=
+            (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
+      }
+    }
+    return Cycles;
+  };
+
  for (unsigned K = 0; K != PRDepths.size(); ++K) {
    unsigned PRCycles = PRDepths[K] + PRHeights[K];
    for (unsigned I = 0; I != Extrablocks.size(); ++I)
      PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
-    for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
-      const MCSchedClassDesc* SC = ExtraInstrs[I];
-      if (!SC->isValid())
-        continue;
-      for (TargetSchedModel::ProcResIter
-             PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
-             PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-        if (PI->ProcResourceIdx != K)
-          continue;
-        PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
-      }
-    }
+    PRCycles += extraCycles(ExtraInstrs, K);
+    PRCycles -= extraCycles(RemoveInstrs, K);
    PRMax = std::max(PRMax, PRCycles);
  }
  // Convert to cycle count.
  PRMax = TE.MTM.getCycles(PRMax);

+  // Instrs: #instructions in current trace outside current block.
  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+  // Add instruction count from the extra blocks.
  for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
    Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+  Instrs += ExtraInstrs.size();
+  Instrs -= RemoveInstrs.size();
  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
    Instrs /= IW;
  // Assume issue width 1 without a schedule model.
  return std::max(Instrs, PRMax);
 }

+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
+                                              const MachineInstr *UseMI) const {
+  if (DefMI->getParent() == UseMI->getParent())
+    return true;
+
+  const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
+  const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
+
+  return DepTBI.isUsefulDominator(TBI);
+}
+
 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
  OS << getName() << " ensemble:\n";
  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {