MachineCombiner Pass for selecting faster instruction

sequence -  target independent framework

 When the DAGcombiner selects instruction sequences
 it could increase the critical path or resource len.

 For example, on arm64 there are multiply-accumulate instructions (madd,
 msub). If e.g. the equivalent  multiply-add sequence is not on the
 crictial path it makes sense to select it instead of  the combined,
 single accumulate instruction (madd/msub). The reason is that the
 conversion from add+mul to the madd could lengthen the critical path
 by the latency of the multiply.

 But the DAGCombiner would always combine and select the madd/msub
 instruction.

 This patch uses machine trace metrics to estimate critical path length
 and resource length of an original instruction sequence vs a combined
 instruction sequence and picks the faster code based on its estimates.

 This patch only commits the target independent framework that evaluates
 and selects code sequences. The machine instruction combiner is turned
 off for all targets and expected to evolve over time by gradually
 handling DAGCombiner pattern in the target specific code.

 This framework lays the groundwork for fixing
 rdar://16319955



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214666 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Gerolf Hoflehner
2014-08-03 21:35:39 +00:00
parent 83eaeba2a8
commit b0b708854e
11 changed files with 588 additions and 18 deletions

View File

@ -1169,6 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
return DepCycle;
}
/// When bottom is set include instructions in current block in estimate.
unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Find the limiting processor resource.
// Numbers have been pre-scaled to be comparable.
@ -1185,7 +1186,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
/// All instructions before current block
unsigned Instrs = TBI.InstrDepth;
// plus instructions in current block
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
@ -1194,44 +1197,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
return std::max(Instrs, PRMax);
}
unsigned MachineTraceMetrics::Trace::
getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
unsigned MachineTraceMetrics::Trace::getResourceLength(
ArrayRef<const MachineBasicBlock *> Extrablocks,
ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
// Add up resources above and below the center block.
ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
unsigned PRMax = 0;
// Capture computing cycles from extra instructions
auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
unsigned ResourceIdx)
->unsigned {
unsigned Cycles = 0;
for (unsigned I = 0; I != Instrs.size(); ++I) {
const MCSchedClassDesc *SC = Instrs[I];
if (!SC->isValid())
continue;
for (TargetSchedModel::ProcResIter
PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
PI != PE; ++PI) {
if (PI->ProcResourceIdx != ResourceIdx)
continue;
Cycles +=
(PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
}
}
return Cycles;
};
for (unsigned K = 0; K != PRDepths.size(); ++K) {
unsigned PRCycles = PRDepths[K] + PRHeights[K];
for (unsigned I = 0; I != Extrablocks.size(); ++I)
PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
const MCSchedClassDesc* SC = ExtraInstrs[I];
if (!SC->isValid())
continue;
for (TargetSchedModel::ProcResIter
PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
if (PI->ProcResourceIdx != K)
continue;
PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
}
}
PRCycles += extraCycles(ExtraInstrs, K);
PRCycles -= extraCycles(RemoveInstrs, K);
PRMax = std::max(PRMax, PRCycles);
}
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
// Instrs: #instructions in current trace outside current block.
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
// Add instruction count from the extra blocks.
for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
Instrs += ExtraInstrs.size();
Instrs -= RemoveInstrs.size();
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
return std::max(Instrs, PRMax);
}
bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
const MachineInstr *UseMI) const {
if (DefMI->getParent() == UseMI->getParent())
return true;
const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
return DepTBI.isUsefulDominator(TBI);
}
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
OS << getName() << " ensemble:\n";
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {