misched: Use the TargetSchedModel interface wherever possible.

Allows the new machine model to be used for NumMicroOps and OutputLatency.

Allows the HazardRecognizer to be disabled along with itineraries.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165603 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2012-10-10 05:43:09 +00:00
parent 6312cb0997
commit 412cd2f813
12 changed files with 126 additions and 115 deletions

View File

@ -31,7 +31,6 @@
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
namespace llvm {
@ -278,19 +277,6 @@ public:
return RegionCriticalPSets;
}
/// getIssueWidth - Return the max instructions per scheduling group.
unsigned getIssueWidth() const {
return (InstrItins && InstrItins->SchedModel)
? InstrItins->SchedModel->IssueWidth : 1;
}
/// getNumMicroOps - Return the number of issue slots required for this MI.
unsigned getNumMicroOps(MachineInstr *MI) const {
if (!InstrItins) return 1;
int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
}
protected:
// Top-Level entry points for the schedule() driver...

View File

@ -570,11 +570,6 @@ namespace llvm {
unsigned VerifyScheduledDAG(bool isBottomUp);
#endif
protected:
/// ComputeLatency - Compute node latency.
///
virtual void computeLatency(SUnit *SU) = 0;
private:
// Return the MCInstrDesc of this SDNode or NULL.
const MCInstrDesc *getNodeDesc(const SDNode *Node) const;

View File

@ -111,7 +111,6 @@ namespace llvm {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
const MachineFrameInfo *MFI;
const InstrItineraryData *InstrItins;
/// Live Intervals provides reaching defs in preRA scheduling.
LiveIntervals *LIS;
@ -187,6 +186,9 @@ namespace llvm {
virtual ~ScheduleDAGInstrs() {}
/// \brief Get the machine model for instruction scheduling.
const TargetSchedModel *getSchedModel() const { return &SchedModel; }
/// begin - Return an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator begin() const { return RegionBegin; }
@ -227,10 +229,6 @@ namespace llvm {
/// used by instructions in the fallthrough block.
void addSchedBarrierDeps();
/// computeLatency - Compute node latency.
///
virtual void computeLatency(SUnit *SU);
/// schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///

View File

@ -55,12 +55,29 @@ public:
/// latency properties, but separate from the per-cycle itinerary data.
bool hasInstrSchedModel() const;
const MCSchedModel *getMCSchedModel() const { return &SchedModel; }
/// \brief Return true if this machine model includes cycle-to-cycle itinerary
/// data.
///
/// This models scheduling at each stage in the processor pipeline.
bool hasInstrItineraries() const;
const InstrItineraryData *getInstrItineraries() const {
if (hasInstrItineraries())
return &InstrItins;
return 0;
}
/// \brief Identify the processor corresponding to the current subtarget.
unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
/// \brief Maximum number of micro-ops that may be scheduled per cycle.
unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
/// \brief Return the number of issue slots required for this MI.
unsigned getNumMicroOps(MachineInstr *MI) const;
/// \brief Compute operand latency based on the available machine model.
///
/// Computes and return the latency of the given data dependent def and use
@ -82,11 +99,12 @@ public:
/// occasionally useful to help estimate instruction cost.
unsigned computeInstrLatency(const MachineInstr *MI) const;
/// \brief Identify the processor corresponding to the current subtarget.
unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
/// \brief Output dependency latency of a pair of defs of the same register.
///
/// This is typically one cycle.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *DepMI) const;
/// \brief Maximum number of micro-ops that may be scheduled per cycle.
unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
private:
/// getDefLatency is a helper for computeOperandLatency. Return the

View File

@ -801,15 +801,6 @@ public:
const MachineInstr *UseMI, unsigned UseIdx,
bool FindMin = false) const;
/// getOutputLatency - Compute and return the output dependency latency of a
/// a given pair of defs which both target the same register. This is usually
/// one.
virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *DepMI) const {
return 1;
}
/// getInstrLatency - Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
/// PredCost.

View File

@ -652,6 +652,7 @@ class ConvergingScheduler : public MachineSchedStrategy {
/// of "hazards" and other interlocks at the current cycle.
struct SchedBoundary {
ScheduleDAGMI *DAG;
const TargetSchedModel *SchedModel;
ReadyQueue Available;
ReadyQueue Pending;
@ -671,13 +672,18 @@ class ConvergingScheduler : public MachineSchedStrategy {
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), Available(ID, Name+".A"),
DAG(0), SchedModel(0), Available(ID, Name+".A"),
Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
~SchedBoundary() { delete HazardRec; }
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel) {
DAG = dag;
SchedModel = smodel;
}
bool isTop() const {
return Available.getID() == ConvergingScheduler::TopQID;
}
@ -698,6 +704,7 @@ class ConvergingScheduler : public MachineSchedStrategy {
};
ScheduleDAGMI *DAG;
const TargetSchedModel *SchedModel;
const TargetRegisterInfo *TRI;
// State of the top and bottom scheduled instruction boundaries.
@ -713,7 +720,7 @@ public:
};
ConvergingScheduler():
DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
virtual void initialize(ScheduleDAGMI *dag);
@ -740,13 +747,15 @@ protected:
void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
DAG = dag;
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
Top.DAG = dag;
Bot.DAG = dag;
Top.init(DAG, SchedModel);
Bot.init(DAG, SchedModel);
// Initialize the HazardRecognizers.
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
const InstrItineraryData *Itin = TM.getInstrItineraryData();
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
@ -807,7 +816,8 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
if (HazardRec->isEnabled())
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if (IssueCount + uops > SchedModel->getIssueWidth())
return true;
return false;
@ -828,7 +838,7 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
/// Move the boundary of scheduled code by one cycle.
void ConvergingScheduler::SchedBoundary::bumpCycle() {
unsigned Width = DAG->getIssueWidth();
unsigned Width = SchedModel->getIssueWidth();
IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
@ -866,8 +876,8 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
}
// Check the instruction group dispatch limit.
// TODO: Check if this SU must end a dispatch group.
IssueCount += DAG->getNumMicroOps(SU->getInstr());
if (IssueCount >= DAG->getIssueWidth()) {
IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
if (IssueCount >= SchedModel->getIssueWidth()) {
DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
bumpCycle();
}

View File

@ -44,8 +44,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineDominatorTree &mdt,
bool IsPostRAFlag,
LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
@ -292,8 +291,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
if (Kind == SDep::Anti)
DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
else {
unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
DefSU->getInstr());
unsigned AOLat =
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
}
}
@ -363,8 +362,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
else {
SUnit *DefSU = DefI->SU;
if (DefSU != SU && DefSU != &ExitSU) {
unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
DefSU->getInstr());
unsigned OutLatency =
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
}
DefI->SU = SU;
@ -650,7 +649,7 @@ void ScheduleDAGInstrs::initSUnits() {
SU->isCommutable = MI->isCommutable();
// Assign the Latency field of SU using target-provided information.
computeLatency(SU);
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
}
}
@ -911,21 +910,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
PendingLoads.clear();
}
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
// Compute the latency for the node. We only provide a default for missing
// itineraries. Empty itineraries still have latency properties.
if (!InstrItins) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
// extra time.
if (SU->getInstr()->mayLoad())
SU->Latency += 2;
} else {
SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
}
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();

View File

@ -14,6 +14,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
@ -44,6 +45,17 @@ void TargetSchedModel::init(const MCSchedModel &sm,
STI->initInstrItins(InstrItins);
}
unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const {
if (hasInstrItineraries()) {
int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
}
if (hasInstrSchedModel())
return resolveSchedClass(MI)->NumMicroOps;
return 1;
}
/// If we can determine the operand latency from the def only, without machine
/// model or itinerary lookup, do so. Otherwise return -1.
int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
@ -209,3 +221,40 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
}
return TII->defaultDefLatency(&SchedModel, MI);
}
unsigned TargetSchedModel::
computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
const MachineInstr *DepMI) const {
// MinLatency == -1 is for in-order processors that always have unit
// MinLatency. MinLatency > 0 is for in-order processors with varying min
// latencies, but since this is not a RAW dep, we always use unit latency.
if (SchedModel.MinLatency != 0)
return 1;
// MinLatency == 0 indicates an out-of-order processor that can dispatch
// WAW dependencies in the same cycle.
// Treat predication as a data dependency for out-of-order cpus. In-order
// cpus do not need to treat predicated writes specially.
//
// TODO: The following hack exists because predication passes do not
// correctly append imp-use operands, and readsReg() strangely returns false
// for predicated defs.
unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getParent()->getParent();
const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
return computeInstrLatency(DefMI);
// If we have a per operand scheduling model, check if this def is writing
// an unbuffered resource. If so, it treated like an in-order cpu.
if (hasInstrSchedModel()) {
const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
*PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
return 1;
}
}
return 0;
}

View File

@ -3550,18 +3550,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
unsigned
ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *DepMI) const {
unsigned Reg = DefMI->getOperand(DefIdx).getReg();
if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
return getInstrLatency(ItinData, DefMI);
}
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {

View File

@ -229,10 +229,6 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *DepMI) const;
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;

View File

@ -128,7 +128,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
if (Packet.size() >= SchedModel->getIssueWidth()) {
ResourcesModel->clearResources();
Packet.clear();
TotalPackets++;
@ -186,18 +186,20 @@ void VLIWMachineScheduler::schedule() {
void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
DAG = static_cast<VLIWMachineScheduler*>(dag);
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
Top.DAG = DAG;
Bot.DAG = DAG;
Top.init(DAG, SchedModel);
Bot.init(DAG, SchedModel);
// Initialize the HazardRecognizers.
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
const InstrItineraryData *Itin = TM.getInstrItineraryData();
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Top.ResourceModel = new VLIWResourceModel(TM);
Bot.ResourceModel = new VLIWResourceModel(TM);
Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
"-misched-topdown incompatible with -misched-bottomup");
@ -256,7 +258,8 @@ bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
if (HazardRec->isEnabled())
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if (IssueCount + uops > SchedModel->getIssueWidth())
return true;
return false;
@ -278,7 +281,7 @@ void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
/// Move the boundary of scheduled code by one cycle.
void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
unsigned Width = DAG->getIssueWidth();
unsigned Width = SchedModel->getIssueWidth();
IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
@ -321,7 +324,7 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
// Check the instruction group dispatch limit.
// TODO: Check if this SU must end a dispatch group.
IssueCount += DAG->getNumMicroOps(SU->getInstr());
IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
if (startNewCycle) {
DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
bumpCycle();

View File

@ -45,7 +45,7 @@ class VLIWResourceModel {
/// definition of DFA by a target.
DFAPacketizer *ResourcesModel;
const InstrItineraryData *InstrItins;
const TargetSchedModel *SchedModel;
/// Local packet/bundle model. Purely
/// internal to the MI schedulre at the time.
@ -55,29 +55,15 @@ class VLIWResourceModel {
unsigned TotalPackets;
public:
VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) :
InstrItins(IID), TotalPackets(0) {
const TargetMachine &TM = C->MF->getTarget();
VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
SchedModel(SM), TotalPackets(0) {
ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
// This hard requirement could be relaxed,
// but for now do not let it proceed.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
Packet.resize(InstrItins->SchedModel->IssueWidth);
Packet.clear();
ResourcesModel->clearResources();
}
VLIWResourceModel(const TargetMachine &TM) :
InstrItins(TM.getInstrItineraryData()), TotalPackets(0) {
ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
// This hard requirement could be relaxed,
// but for now do not let it proceed.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
Packet.resize(InstrItins->SchedModel->IssueWidth);
Packet.resize(SchedModel->getIssueWidth());
Packet.clear();
ResourcesModel->clearResources();
}
@ -146,6 +132,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
/// of "hazards" and other interlocks at the current cycle.
struct SchedBoundary {
VLIWMachineScheduler *DAG;
const TargetSchedModel *SchedModel;
ReadyQueue Available;
ReadyQueue Pending;
@ -166,7 +153,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), Available(ID, Name+".A"),
DAG(0), SchedModel(0), Available(ID, Name+".A"),
Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
CheckPending(false), HazardRec(0), ResourceModel(0),
CurrCycle(0), IssueCount(0),
@ -177,6 +164,11 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
delete HazardRec;
}
void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
DAG = dag;
SchedModel = smodel;
}
bool isTop() const {
return Available.getID() == ConvergingVLIWScheduler::TopQID;
}
@ -197,6 +189,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
};
VLIWMachineScheduler *DAG;
const TargetSchedModel *SchedModel;
const TargetRegisterInfo *TRI;
// State of the top and bottom scheduled instruction boundaries.
@ -212,7 +205,7 @@ public:
};
ConvergingVLIWScheduler():
DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
virtual void initialize(ScheduleDAGMI *dag);